1
# DP: Changes from the ibm/gcc-4_8-branch (20140306)
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@208295 \
4
svn://gcc.gnu.org/svn/gcc/branches/ibm/gcc-4_8-branch@208322 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/configure
8
+++ b/src/libitm/configure
13
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
14
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
15
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
16
# Find out which ABI we are using.
17
echo 'int i;' > conftest.$ac_ext
18
@@ -7295,7 +7295,10 @@
22
- ppc64-*linux*|powerpc64-*linux*)
23
+ powerpc64le-*linux*)
24
+ LD="${LD-ld} -m elf32lppclinux"
27
LD="${LD-ld} -m elf32ppclinux"
30
@@ -7314,7 +7317,10 @@
32
LD="${LD-ld} -m elf_x86_64"
34
- ppc*-*linux*|powerpc*-*linux*)
36
+ LD="${LD-ld} -m elf64lppc"
39
LD="${LD-ld} -m elf64ppc"
41
s390*-*linux*|s390*-*tpf*)
42
@@ -11779,7 +11785,7 @@
43
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
44
lt_status=$lt_dlunknown
45
cat > conftest.$ac_ext <<_LT_EOF
46
-#line 11782 "configure"
47
+#line 11788 "configure"
51
@@ -11885,7 +11891,7 @@
52
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
53
lt_status=$lt_dlunknown
54
cat > conftest.$ac_ext <<_LT_EOF
55
-#line 11888 "configure"
56
+#line 11894 "configure"
60
@@ -17401,7 +17407,44 @@
64
+case "${target_cpu}" in
66
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports HTM" >&5
67
+$as_echo_n "checking if the assembler supports HTM... " >&6; }
68
+if test "${libitm_cv_as_htm+set}" = set; then :
69
+ $as_echo_n "(cached) " >&6
72
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
73
+/* end confdefs.h. */
78
+asm("tbegin. 0; tend. 0");
83
+if ac_fn_c_try_compile "$LINENO"; then :
84
+ libitm_cv_as_htm=yes
88
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
91
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libitm_cv_as_htm" >&5
92
+$as_echo "$libitm_cv_as_htm" >&6; }
93
+ if test x$libitm_cv_as_htm = xyes; then
95
+$as_echo "#define HAVE_AS_HTM 1" >>confdefs.h
102
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether weak refs work like ELF" >&5
103
$as_echo_n "checking whether weak refs work like ELF... " >&6; }
104
if test "${ac_cv_have_elf_style_weakref+set}" = set; then :
105
--- a/src/libitm/ChangeLog.ibm
106
+++ b/src/libitm/ChangeLog.ibm
108
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
110
+ Backport from mainline r204808:
112
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
114
+ * config/powerpc/sjlj.S [__powerpc64__ && _CALL_ELF == 2]:
115
+ (FUNC): Define ELFv2 variant.
117
+ (HIDDEN): Likewise.
120
+ (LR_SAVE): Likewise.
122
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
124
+ Backport from mainline
125
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
127
+ * acinclude.m4 (LIBITM_CHECK_AS_HTM): New.
128
+ * configure.ac: Use it.
129
+ (AC_CHECK_HEADERS): Check for sys/auxv.h.
130
+ (AC_CHECK_FUNCS): Check for getauxval.
131
+ * config.h.in, configure: Rebuild.
132
+ * configure.tgt (target_cpu): Add -mhtm to XCFLAGS.
133
+ * config/powerpc/target.h: Include sys/auxv.h and htmintrin.h.
134
+ (USE_HTM_FASTPATH): Define.
135
+ (_TBEGIN_STARTED, _TBEGIN_INDETERMINATE, _TBEGIN_PERSISTENT,
136
+ _HTM_RETRIES) New macros.
137
+ (htm_abort, htm_abort_should_retry, htm_available, htm_begin, htm_init,
138
+ htm_begin_success, htm_commit, htm_transaction_active): New functions.
139
--- a/src/libitm/configure.tgt
140
+++ b/src/libitm/configure.tgt
142
# work out any special compilation flags as necessary.
143
case "${target_cpu}" in
144
alpha*) ARCH=alpha ;;
145
- rs6000 | powerpc*) ARCH=powerpc ;;
147
+ XCFLAGS="${XCFLAGS} -mhtm"
153
--- a/src/libitm/config/powerpc/sjlj.S
154
+++ b/src/libitm/config/powerpc/sjlj.S
159
-#if defined(__powerpc64__) && defined(__ELF__)
160
+#if defined(__powerpc64__) && _CALL_ELF == 2
163
+ .type \name, @function
165
+0: addis 2,12,(.TOC.-0b)@ha
166
+ addi 2,2,(.TOC.-0b)@l
167
+ .localentry \name, . - \name
170
+ .size \name, . - \name
179
+#elif defined(__powerpc64__) && defined(__ELF__)
185
#if defined(_CALL_AIXDESC)
187
# define LR_SAVE 2*WS
188
+#elif _CALL_ELF == 2
190
+# define LR_SAVE 2*WS
191
#elif defined(_CALL_SYSV)
193
# define LR_SAVE 1*WS
194
--- a/src/libitm/config/powerpc/target.h
195
+++ b/src/libitm/config/powerpc/target.h
197
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
198
<http://www.gnu.org/licenses/>. */
200
+#ifdef HAVE_SYS_AUXV_H
201
+#include <sys/auxv.h>
204
namespace GTM HIDDEN {
206
typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
208
__asm volatile ("" : : : "memory");
211
+// Use HTM if it is supported by the system.
212
+// See gtm_thread::begin_transaction for how these functions are used.
213
+#if defined (__linux__) \
214
+ && defined (HAVE_AS_HTM) \
215
+ && defined (HAVE_GETAUXVAL) \
216
+ && defined (AT_HWCAP2) \
217
+ && defined (PPC_FEATURE2_HAS_HTM)
219
+#include <htmintrin.h>
221
+#define USE_HTM_FASTPATH
223
+#define _TBEGIN_STARTED 0
224
+#define _TBEGIN_INDETERMINATE 1
225
+#define _TBEGIN_PERSISTENT 2
227
+/* Number of retries for transient failures. */
228
+#define _HTM_RETRIES 10
231
+htm_available (void)
233
+ return (getauxval (AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) ? true : false;
236
+static inline uint32_t
239
+ // Maximum number of times we try to execute a transaction
240
+ // as a HW transaction.
241
+ return htm_available () ? _HTM_RETRIES : 0;
244
+static inline uint32_t
247
+ if (__builtin_expect (__builtin_tbegin (0), 1))
248
+ return _TBEGIN_STARTED;
250
+ if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ()))
251
+ return _TBEGIN_PERSISTENT;
253
+ return _TBEGIN_INDETERMINATE;
257
+htm_begin_success (uint32_t begin_ret)
259
+ return begin_ret == _TBEGIN_STARTED;
265
+ __builtin_tend (0);
271
+ __builtin_tabort (0);
275
+htm_abort_should_retry (uint32_t begin_ret)
277
+ return begin_ret != _TBEGIN_PERSISTENT;
280
+/* Returns true iff a hardware transaction is currently being executed. */
282
+htm_transaction_active (void)
284
+ return (_HTM_STATE (__builtin_ttest ()) == _HTM_TRANSACTIONAL);
290
--- a/src/libitm/acinclude.m4
291
+++ b/src/libitm/acinclude.m4
296
+dnl Check if as supports HTM instructions.
297
+AC_DEFUN([LIBITM_CHECK_AS_HTM], [
298
+case "${target_cpu}" in
300
+ AC_CACHE_CHECK([if the assembler supports HTM], libitm_cv_as_htm, [
301
+ AC_TRY_COMPILE([], [asm("tbegin. 0; tend. 0");],
302
+ [libitm_cv_as_htm=yes], [libitm_cv_as_htm=no])
304
+ if test x$libitm_cv_as_htm = xyes; then
305
+ AC_DEFINE(HAVE_AS_HTM, 1, [Define to 1 if the assembler supports HTM.])
310
sinclude(../libtool.m4)
311
dnl The lines below arrange for aclocal not to bring an installed
312
dnl libtool.m4 into aclocal.m4, while still arranging for automake to
315
@@ -1220,7 +1220,7 @@
319
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
320
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
321
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
322
# Find out which ABI we are using.
323
echo 'int i;' > conftest.$ac_ext
324
@@ -1241,7 +1241,10 @@
328
- ppc64-*linux*|powerpc64-*linux*)
329
+ powerpc64le-*linux*)
330
+ LD="${LD-ld} -m elf32lppclinux"
333
LD="${LD-ld} -m elf32ppclinux"
336
@@ -1260,7 +1263,10 @@
338
LD="${LD-ld} -m elf_x86_64"
340
- ppc*-*linux*|powerpc*-*linux*)
342
+ LD="${LD-ld} -m elf64lppc"
345
LD="${LD-ld} -m elf64ppc"
347
s390*-*linux*|s390*-*tpf*)
348
--- a/src/libgomp/configure
349
+++ b/src/libgomp/configure
350
@@ -6580,7 +6580,7 @@
354
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
355
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
356
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
357
# Find out which ABI we are using.
358
echo 'int i;' > conftest.$ac_ext
359
@@ -6605,7 +6605,10 @@
363
- ppc64-*linux*|powerpc64-*linux*)
364
+ powerpc64le-*linux*)
365
+ LD="${LD-ld} -m elf32lppclinux"
368
LD="${LD-ld} -m elf32ppclinux"
371
@@ -6624,7 +6627,10 @@
373
LD="${LD-ld} -m elf_x86_64"
375
- ppc*-*linux*|powerpc*-*linux*)
377
+ LD="${LD-ld} -m elf64lppc"
380
LD="${LD-ld} -m elf64ppc"
382
s390*-*linux*|s390*-*tpf*)
383
@@ -11088,7 +11094,7 @@
384
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
385
lt_status=$lt_dlunknown
386
cat > conftest.$ac_ext <<_LT_EOF
387
-#line 11091 "configure"
388
+#line 11097 "configure"
389
#include "confdefs.h"
392
@@ -11194,7 +11200,7 @@
393
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
394
lt_status=$lt_dlunknown
395
cat > conftest.$ac_ext <<_LT_EOF
396
-#line 11197 "configure"
397
+#line 11203 "configure"
398
#include "confdefs.h"
401
--- a/src/libquadmath/configure
402
+++ b/src/libquadmath/configure
403
@@ -6248,7 +6248,7 @@
407
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
408
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
409
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
410
# Find out which ABI we are using.
411
echo 'int i;' > conftest.$ac_ext
412
@@ -6273,7 +6273,10 @@
416
- ppc64-*linux*|powerpc64-*linux*)
417
+ powerpc64le-*linux*)
418
+ LD="${LD-ld} -m elf32lppclinux"
421
LD="${LD-ld} -m elf32ppclinux"
424
@@ -6292,7 +6295,10 @@
426
LD="${LD-ld} -m elf_x86_64"
428
- ppc*-*linux*|powerpc*-*linux*)
430
+ LD="${LD-ld} -m elf64lppc"
433
LD="${LD-ld} -m elf64ppc"
435
s390*-*linux*|s390*-*tpf*)
436
@@ -10521,7 +10527,7 @@
437
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
438
lt_status=$lt_dlunknown
439
cat > conftest.$ac_ext <<_LT_EOF
440
-#line 10524 "configure"
441
+#line 10530 "configure"
442
#include "confdefs.h"
445
@@ -10627,7 +10633,7 @@
446
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
447
lt_status=$lt_dlunknown
448
cat > conftest.$ac_ext <<_LT_EOF
449
-#line 10630 "configure"
450
+#line 10636 "configure"
451
#include "confdefs.h"
454
--- a/src/libsanitizer/configure
455
+++ b/src/libsanitizer/configure
456
@@ -6604,7 +6604,7 @@
460
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
461
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
462
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
463
# Find out which ABI we are using.
464
echo 'int i;' > conftest.$ac_ext
465
@@ -6629,7 +6629,10 @@
469
- ppc64-*linux*|powerpc64-*linux*)
470
+ powerpc64le-*linux*)
471
+ LD="${LD-ld} -m elf32lppclinux"
474
LD="${LD-ld} -m elf32ppclinux"
477
@@ -6648,7 +6651,10 @@
479
LD="${LD-ld} -m elf_x86_64"
481
- ppc*-*linux*|powerpc*-*linux*)
483
+ LD="${LD-ld} -m elf64lppc"
486
LD="${LD-ld} -m elf64ppc"
488
s390*-*linux*|s390*-*tpf*)
489
@@ -11111,7 +11117,7 @@
490
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
491
lt_status=$lt_dlunknown
492
cat > conftest.$ac_ext <<_LT_EOF
493
-#line 11114 "configure"
494
+#line 11120 "configure"
495
#include "confdefs.h"
498
@@ -11217,7 +11223,7 @@
499
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
500
lt_status=$lt_dlunknown
501
cat > conftest.$ac_ext <<_LT_EOF
502
-#line 11220 "configure"
503
+#line 11226 "configure"
504
#include "confdefs.h"
507
--- a/src/libsanitizer/ChangeLog.ibm
508
+++ b/src/libsanitizer/ChangeLog.ibm
510
+2014-03-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
512
+ Backport from mainline r208290
513
+ 2014-03-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
515
+ * configure.tgt: Unsupported for little endian PowerPC for now.
517
--- a/src/libsanitizer/configure.tgt
518
+++ b/src/libsanitizer/configure.tgt
523
+ powerpc*le-*-linux*)
529
--- a/src/zlib/configure
530
+++ b/src/zlib/configure
531
@@ -5853,7 +5853,7 @@
535
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
536
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
537
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
538
# Find out which ABI we are using.
539
echo 'int i;' > conftest.$ac_ext
540
@@ -5878,7 +5878,10 @@
544
- ppc64-*linux*|powerpc64-*linux*)
545
+ powerpc64le-*linux*)
546
+ LD="${LD-ld} -m elf32lppclinux"
549
LD="${LD-ld} -m elf32ppclinux"
552
@@ -5897,7 +5900,10 @@
554
LD="${LD-ld} -m elf_x86_64"
556
- ppc*-*linux*|powerpc*-*linux*)
558
+ LD="${LD-ld} -m elf64lppc"
561
LD="${LD-ld} -m elf64ppc"
563
s390*-*linux*|s390*-*tpf*)
564
@@ -10394,7 +10400,7 @@
565
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
566
lt_status=$lt_dlunknown
567
cat > conftest.$ac_ext <<_LT_EOF
568
-#line 10397 "configure"
569
+#line 10403 "configure"
570
#include "confdefs.h"
573
@@ -10500,7 +10506,7 @@
574
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
575
lt_status=$lt_dlunknown
576
cat > conftest.$ac_ext <<_LT_EOF
577
-#line 10503 "configure"
578
+#line 10509 "configure"
579
#include "confdefs.h"
582
--- a/src/libstdc++-v3/configure
583
+++ b/src/libstdc++-v3/configure
584
@@ -7111,7 +7111,7 @@
588
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
589
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
590
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
591
# Find out which ABI we are using.
592
echo 'int i;' > conftest.$ac_ext
593
@@ -7136,7 +7136,10 @@
597
- ppc64-*linux*|powerpc64-*linux*)
598
+ powerpc64le-*linux*)
599
+ LD="${LD-ld} -m elf32lppclinux"
602
LD="${LD-ld} -m elf32ppclinux"
605
@@ -7155,7 +7158,10 @@
607
LD="${LD-ld} -m elf_x86_64"
609
- ppc*-*linux*|powerpc*-*linux*)
611
+ LD="${LD-ld} -m elf64lppc"
614
LD="${LD-ld} -m elf64ppc"
616
s390*-*linux*|s390*-*tpf*)
617
@@ -11513,7 +11519,7 @@
618
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
619
lt_status=$lt_dlunknown
620
cat > conftest.$ac_ext <<_LT_EOF
621
-#line 11516 "configure"
622
+#line 11522 "configure"
623
#include "confdefs.h"
626
@@ -11619,7 +11625,7 @@
627
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
628
lt_status=$lt_dlunknown
629
cat > conftest.$ac_ext <<_LT_EOF
630
-#line 11622 "configure"
631
+#line 11628 "configure"
632
#include "confdefs.h"
635
@@ -15033,7 +15039,7 @@
637
# Fake what AC_TRY_COMPILE does. XXX Look at redoing this new-style.
638
cat > conftest.$ac_ext << EOF
639
-#line 15036 "configure"
640
+#line 15042 "configure"
644
@@ -15383,7 +15389,7 @@
645
# Fake what AC_TRY_COMPILE does.
647
cat > conftest.$ac_ext << EOF
648
-#line 15386 "configure"
649
+#line 15392 "configure"
652
typedef bool atomic_type;
653
@@ -15418,7 +15424,7 @@
656
cat > conftest.$ac_ext << EOF
657
-#line 15421 "configure"
658
+#line 15427 "configure"
661
typedef short atomic_type;
662
@@ -15453,7 +15459,7 @@
665
cat > conftest.$ac_ext << EOF
666
-#line 15456 "configure"
667
+#line 15462 "configure"
670
// NB: _Atomic_word not necessarily int.
671
@@ -15489,7 +15495,7 @@
674
cat > conftest.$ac_ext << EOF
675
-#line 15492 "configure"
676
+#line 15498 "configure"
679
typedef long long atomic_type;
680
@@ -15568,7 +15574,7 @@
681
# unnecessary for this test.
683
cat > conftest.$ac_ext << EOF
684
-#line 15571 "configure"
685
+#line 15577 "configure"
689
@@ -15610,7 +15616,7 @@
690
# unnecessary for this test.
692
cat > conftest.$ac_ext << EOF
693
-#line 15613 "configure"
694
+#line 15619 "configure"
695
template<typename T1, typename T2>
697
{ typedef T2 type; };
698
@@ -15644,7 +15650,7 @@
701
cat > conftest.$ac_ext << EOF
702
-#line 15647 "configure"
703
+#line 15653 "configure"
704
template<typename T1, typename T2>
706
{ typedef T2 type; };
707
--- a/src/libstdc++-v3/scripts/extract_symvers.in
708
+++ b/src/libstdc++-v3/scripts/extract_symvers.in
710
# present on Solaris.
712
sed -e 's/ \[<other>: [A-Fa-f0-9]*\] //' -e '/\.dynsym/,/^$/p;d' |\
713
+ sed -e 's/ \[<localentry>: [0-9]*\] //' |\
714
egrep -v ' (LOCAL|UND) ' |\
715
egrep -v ' (_DYNAMIC|_GLOBAL_OFFSET_TABLE_|_PROCEDURE_LINKAGE_TABLE_|_edata|_end|_etext)$' |\
716
sed -e 's/ <processor specific>: / <processor_specific>:_/g' |\
717
--- a/src/libstdc++-v3/ChangeLog.ibm
718
+++ b/src/libstdc++-v3/ChangeLog.ibm
720
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
722
+ Backport from mainline r204808:
724
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
726
+ * scripts/extract_symvers.in: Ignore <localentry: > fields
727
+ in readelf --symbols output.
729
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
731
+ Backport from mainline
732
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
735
+ * include/tr1/cmath: Remove pow(double,double) overload, remove a
736
+ duplicated comment about DR 550. Add a comment to explain the issue.
737
+ * testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc: New.
739
--- a/src/libstdc++-v3/include/tr1/cmath
740
+++ b/src/libstdc++-v3/include/tr1/cmath
742
nexttoward(_Tp __x, long double __y)
743
{ return __builtin_nexttoward(__x, __y); }
745
- // DR 550. What should the return type of pow(float,int) be?
746
- // NB: C++0x and TR1 != C++03.
750
remainder(float __x, float __y)
751
{ return __builtin_remainderf(__x, __y); }
752
@@ -985,10 +981,19 @@
754
// DR 550. What should the return type of pow(float,int) be?
755
// NB: C++0x and TR1 != C++03.
757
- pow(double __x, double __y)
758
- { return std::pow(__x, __y); }
760
+ // The std::tr1::pow(double, double) overload cannot be provided
761
+ // here, because it would clash with ::pow(double,double) declared
762
+ // in <math.h>, if <tr1/math.h> is included at the same time (raised
763
+ // by the fix of PR c++/54537). It is not possible either to use the
764
+ // using-declaration 'using ::pow;' here, because if the user code
765
+ // has a 'using std::pow;', it would bring the pow(*,int) averloads
766
+ // in the tr1 namespace, which is undesirable. Consequently, the
767
+ // solution is to forward std::tr1::pow(double,double) to
768
+ // std::pow(double,double) via the templatized version below. See
769
+ // the discussion about this issue here:
770
+ // http://gcc.gnu.org/ml/gcc-patches/2012-09/msg01278.html
773
pow(float __x, float __y)
774
{ return std::pow(__x, __y); }
775
--- a/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc
776
+++ b/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc
778
+// { dg-do compile }
780
+// Copyright (C) 2013 Free Software Foundation, Inc.
782
+// This file is part of the GNU ISO C++ Library. This library is free
783
+// software; you can redistribute it and/or modify it under the
784
+// terms of the GNU General Public License as published by the
785
+// Free Software Foundation; either version 3, or (at your option)
786
+// any later version.
788
+// This library is distributed in the hope that it will be useful,
789
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
790
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
791
+// GNU General Public License for more details.
793
+// You should have received a copy of the GNU General Public License along
794
+// with this library; see the file COPYING3. If not see
795
+// <http://www.gnu.org/licenses/>.
799
+#include <tr1/cmath>
800
+#include <testsuite_tr1.h>
805
+ using namespace __gnu_test;
807
+ float x = 2080703.375F;
808
+ check_ret_type<float>(std::pow(x, 2));
809
+ check_ret_type<double>(std::tr1::pow(x, 2));
811
--- a/src/libmudflap/configure
812
+++ b/src/libmudflap/configure
813
@@ -6377,7 +6377,7 @@
817
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
818
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
819
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
820
# Find out which ABI we are using.
821
echo 'int i;' > conftest.$ac_ext
822
@@ -6402,7 +6402,10 @@
826
- ppc64-*linux*|powerpc64-*linux*)
827
+ powerpc64le-*linux*)
828
+ LD="${LD-ld} -m elf32lppclinux"
831
LD="${LD-ld} -m elf32ppclinux"
834
@@ -6421,7 +6424,10 @@
836
LD="${LD-ld} -m elf_x86_64"
838
- ppc*-*linux*|powerpc*-*linux*)
840
+ LD="${LD-ld} -m elf64lppc"
843
LD="${LD-ld} -m elf64ppc"
845
s390*-*linux*|s390*-*tpf*)
846
@@ -10615,7 +10621,7 @@
847
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
848
lt_status=$lt_dlunknown
849
cat > conftest.$ac_ext <<_LT_EOF
850
-#line 10618 "configure"
851
+#line 10624 "configure"
852
#include "confdefs.h"
855
@@ -10721,7 +10727,7 @@
856
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
857
lt_status=$lt_dlunknown
858
cat > conftest.$ac_ext <<_LT_EOF
859
-#line 10724 "configure"
860
+#line 10730 "configure"
861
#include "confdefs.h"
864
--- a/src/boehm-gc/configure
865
+++ b/src/boehm-gc/configure
866
@@ -6770,7 +6770,7 @@
870
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
871
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
872
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
873
# Find out which ABI we are using.
874
echo 'int i;' > conftest.$ac_ext
875
@@ -6795,7 +6795,10 @@
879
- ppc64-*linux*|powerpc64-*linux*)
880
+ powerpc64le-*linux*)
881
+ LD="${LD-ld} -m elf32lppclinux"
884
LD="${LD-ld} -m elf32ppclinux"
887
@@ -6814,7 +6817,10 @@
889
LD="${LD-ld} -m elf_x86_64"
891
- ppc*-*linux*|powerpc*-*linux*)
893
+ LD="${LD-ld} -m elf64lppc"
896
LD="${LD-ld} -m elf64ppc"
898
s390*-*linux*|s390*-*tpf*)
899
@@ -11312,7 +11318,7 @@
900
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
901
lt_status=$lt_dlunknown
902
cat > conftest.$ac_ext <<_LT_EOF
903
-#line 11315 "configure"
904
+#line 11321 "configure"
905
#include "confdefs.h"
908
@@ -11418,7 +11424,7 @@
909
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
910
lt_status=$lt_dlunknown
911
cat > conftest.$ac_ext <<_LT_EOF
912
-#line 11421 "configure"
913
+#line 11427 "configure"
914
#include "confdefs.h"
917
--- a/src/lto-plugin/configure
918
+++ b/src/lto-plugin/configure
919
@@ -6044,7 +6044,7 @@
923
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
924
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
925
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
926
# Find out which ABI we are using.
927
echo 'int i;' > conftest.$ac_ext
928
@@ -6069,7 +6069,10 @@
932
- ppc64-*linux*|powerpc64-*linux*)
933
+ powerpc64le-*linux*)
934
+ LD="${LD-ld} -m elf32lppclinux"
937
LD="${LD-ld} -m elf32ppclinux"
940
@@ -6088,7 +6091,10 @@
942
LD="${LD-ld} -m elf_x86_64"
944
- ppc*-*linux*|powerpc*-*linux*)
946
+ LD="${LD-ld} -m elf64lppc"
949
LD="${LD-ld} -m elf64ppc"
951
s390*-*linux*|s390*-*tpf*)
952
@@ -10552,7 +10558,7 @@
953
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
954
lt_status=$lt_dlunknown
955
cat > conftest.$ac_ext <<_LT_EOF
956
-#line 10555 "configure"
957
+#line 10561 "configure"
958
#include "confdefs.h"
961
@@ -10658,7 +10664,7 @@
962
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
963
lt_status=$lt_dlunknown
964
cat > conftest.$ac_ext <<_LT_EOF
965
-#line 10661 "configure"
966
+#line 10667 "configure"
967
#include "confdefs.h"
970
--- a/src/libatomic/configure
971
+++ b/src/libatomic/configure
972
@@ -6505,7 +6505,7 @@
976
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
977
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
978
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
979
# Find out which ABI we are using.
980
echo 'int i;' > conftest.$ac_ext
981
@@ -6530,7 +6530,10 @@
985
- ppc64-*linux*|powerpc64-*linux*)
986
+ powerpc64le-*linux*)
987
+ LD="${LD-ld} -m elf32lppclinux"
990
LD="${LD-ld} -m elf32ppclinux"
993
@@ -6549,7 +6552,10 @@
995
LD="${LD-ld} -m elf_x86_64"
997
- ppc*-*linux*|powerpc*-*linux*)
999
+ LD="${LD-ld} -m elf64lppc"
1002
LD="${LD-ld} -m elf64ppc"
1004
s390*-*linux*|s390*-*tpf*)
1005
@@ -11013,7 +11019,7 @@
1006
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1007
lt_status=$lt_dlunknown
1008
cat > conftest.$ac_ext <<_LT_EOF
1009
-#line 11016 "configure"
1010
+#line 11022 "configure"
1011
#include "confdefs.h"
1014
@@ -11119,7 +11125,7 @@
1015
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1016
lt_status=$lt_dlunknown
1017
cat > conftest.$ac_ext <<_LT_EOF
1018
-#line 11122 "configure"
1019
+#line 11128 "configure"
1020
#include "confdefs.h"
1023
--- a/src/libbacktrace/configure
1024
+++ b/src/libbacktrace/configure
1025
@@ -6842,7 +6842,7 @@
1029
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1030
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1031
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1032
# Find out which ABI we are using.
1033
echo 'int i;' > conftest.$ac_ext
1034
@@ -6867,7 +6867,10 @@
1038
- ppc64-*linux*|powerpc64-*linux*)
1039
+ powerpc64le-*linux*)
1040
+ LD="${LD-ld} -m elf32lppclinux"
1042
+ powerpc64-*linux*)
1043
LD="${LD-ld} -m elf32ppclinux"
1046
@@ -6886,7 +6889,10 @@
1048
LD="${LD-ld} -m elf_x86_64"
1050
- ppc*-*linux*|powerpc*-*linux*)
1051
+ powerpcle-*linux*)
1052
+ LD="${LD-ld} -m elf64lppc"
1055
LD="${LD-ld} -m elf64ppc"
1057
s390*-*linux*|s390*-*tpf*)
1058
@@ -11081,7 +11087,7 @@
1059
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1060
lt_status=$lt_dlunknown
1061
cat > conftest.$ac_ext <<_LT_EOF
1062
-#line 11084 "configure"
1063
+#line 11090 "configure"
1064
#include "confdefs.h"
1067
@@ -11187,7 +11193,7 @@
1068
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1069
lt_status=$lt_dlunknown
1070
cat > conftest.$ac_ext <<_LT_EOF
1071
-#line 11190 "configure"
1072
+#line 11196 "configure"
1073
#include "confdefs.h"
1076
--- a/src/libjava/libltdl/configure
1077
+++ b/src/libjava/libltdl/configure
1078
@@ -4806,7 +4806,7 @@
1082
-x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1083
+x86_64-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1084
# Find out which ABI we are using.
1085
echo 'int i;' > conftest.$ac_ext
1086
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
1087
@@ -4820,7 +4820,10 @@
1089
LD="${LD-ld} -m elf_i386"
1091
- ppc64-*linux*|powerpc64-*linux*)
1092
+ powerpc64le-*linux*)
1093
+ LD="${LD-ld} -m elf32lppclinux"
1095
+ powerpc64-*linux*)
1096
LD="${LD-ld} -m elf32ppclinux"
1099
@@ -4836,7 +4839,10 @@
1101
LD="${LD-ld} -m elf_x86_64"
1103
- ppc*-*linux*|powerpc*-*linux*)
1104
+ powerpcle-*linux*)
1105
+ LD="${LD-ld} -m elf64lppc"
1108
LD="${LD-ld} -m elf64ppc"
1111
@@ -6456,11 +6462,11 @@
1112
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1113
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1114
-e 's:$: $lt_compiler_flag:'`
1115
- (eval echo "\"\$as_me:6459: $lt_compile\"" >&5)
1116
+ (eval echo "\"\$as_me:6465: $lt_compile\"" >&5)
1117
(eval "$lt_compile" 2>conftest.err)
1119
cat conftest.err >&5
1120
- echo "$as_me:6463: \$? = $ac_status" >&5
1121
+ echo "$as_me:6469: \$? = $ac_status" >&5
1122
if (exit $ac_status) && test -s "$ac_outfile"; then
1123
# The compiler can only warn and ignore the option if not recognized
1124
# So say no if there are warnings other than the usual output.
1125
@@ -6718,11 +6724,11 @@
1126
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1127
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1128
-e 's:$: $lt_compiler_flag:'`
1129
- (eval echo "\"\$as_me:6721: $lt_compile\"" >&5)
1130
+ (eval echo "\"\$as_me:6727: $lt_compile\"" >&5)
1131
(eval "$lt_compile" 2>conftest.err)
1133
cat conftest.err >&5
1134
- echo "$as_me:6725: \$? = $ac_status" >&5
1135
+ echo "$as_me:6731: \$? = $ac_status" >&5
1136
if (exit $ac_status) && test -s "$ac_outfile"; then
1137
# The compiler can only warn and ignore the option if not recognized
1138
# So say no if there are warnings other than the usual output.
1139
@@ -6780,11 +6786,11 @@
1140
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1141
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1142
-e 's:$: $lt_compiler_flag:'`
1143
- (eval echo "\"\$as_me:6783: $lt_compile\"" >&5)
1144
+ (eval echo "\"\$as_me:6789: $lt_compile\"" >&5)
1145
(eval "$lt_compile" 2>out/conftest.err)
1147
cat out/conftest.err >&5
1148
- echo "$as_me:6787: \$? = $ac_status" >&5
1149
+ echo "$as_me:6793: \$? = $ac_status" >&5
1150
if (exit $ac_status) && test -s out/conftest2.$ac_objext
1152
# The compiler can only warn and ignore the option if not recognized
1153
@@ -8099,7 +8105,7 @@
1156
x86_64*|s390x*|powerpc64*)
1157
- echo '#line 8102 "configure"' > conftest.$ac_ext
1158
+ echo '#line 8108 "configure"' > conftest.$ac_ext
1159
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
1160
(eval $ac_compile) 2>&5
1162
@@ -8652,7 +8658,7 @@
1163
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1164
lt_status=$lt_dlunknown
1165
cat > conftest.$ac_ext <<EOF
1166
-#line 8655 "configure"
1167
+#line 8661 "configure"
1168
#include "confdefs.h"
1171
@@ -8750,7 +8756,7 @@
1172
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1173
lt_status=$lt_dlunknown
1174
cat > conftest.$ac_ext <<EOF
1175
-#line 8753 "configure"
1176
+#line 8759 "configure"
1177
#include "confdefs.h"
1180
@@ -10591,7 +10597,7 @@
1181
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1182
lt_status=$lt_dlunknown
1183
cat > conftest.$ac_ext <<EOF
1184
-#line 10594 "configure"
1185
+#line 10600 "configure"
1186
#include "confdefs.h"
1189
--- a/src/libjava/libltdl/acinclude.m4
1190
+++ b/src/libjava/libltdl/acinclude.m4
1195
-x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1196
+x86_64-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1197
# Find out which ABI we are using.
1198
echo 'int i;' > conftest.$ac_ext
1199
if AC_TRY_EVAL(ac_compile); then
1200
@@ -529,7 +529,10 @@
1202
LD="${LD-ld} -m elf_i386"
1204
- ppc64-*linux*|powerpc64-*linux*)
1205
+ powerpc64le-*linux*)
1206
+ LD="${LD-ld} -m elf32lppclinux"
1208
+ powerpc64-*linux*)
1209
LD="${LD-ld} -m elf32ppclinux"
1212
@@ -545,7 +548,10 @@
1214
LD="${LD-ld} -m elf_x86_64"
1216
- ppc*-*linux*|powerpc*-*linux*)
1217
+ powerpcle-*linux*)
1218
+ LD="${LD-ld} -m elf64lppc"
1221
LD="${LD-ld} -m elf64ppc"
1224
--- a/src/libjava/classpath/configure
1225
+++ b/src/libjava/classpath/configure
1226
@@ -7577,7 +7577,7 @@
1230
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1231
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1232
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1233
# Find out which ABI we are using.
1234
echo 'int i;' > conftest.$ac_ext
1235
@@ -7602,7 +7602,10 @@
1239
- ppc64-*linux*|powerpc64-*linux*)
1240
+ powerpc64le-*linux*)
1241
+ LD="${LD-ld} -m elf32lppclinux"
1243
+ powerpc64-*linux*)
1244
LD="${LD-ld} -m elf32ppclinux"
1247
@@ -7621,7 +7624,10 @@
1249
LD="${LD-ld} -m elf_x86_64"
1251
- ppc*-*linux*|powerpc*-*linux*)
1252
+ powerpcle-*linux*)
1253
+ LD="${LD-ld} -m elf64lppc"
1256
LD="${LD-ld} -m elf64ppc"
1258
s390*-*linux*|s390*-*tpf*)
1259
@@ -11820,7 +11826,7 @@
1260
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1261
lt_status=$lt_dlunknown
1262
cat > conftest.$ac_ext <<_LT_EOF
1263
-#line 11823 "configure"
1264
+#line 11829 "configure"
1265
#include "confdefs.h"
1268
@@ -11926,7 +11932,7 @@
1269
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1270
lt_status=$lt_dlunknown
1271
cat > conftest.$ac_ext <<_LT_EOF
1272
-#line 11929 "configure"
1273
+#line 11935 "configure"
1274
#include "confdefs.h"
1277
@@ -25300,7 +25306,7 @@
1278
JAVA_TEST=Object.java
1279
CLASS_TEST=Object.class
1280
cat << \EOF > $JAVA_TEST
1281
-/* #line 25303 "configure" */
1282
+/* #line 25309 "configure" */
1286
@@ -25393,7 +25399,7 @@
1287
if uudecode$EXEEXT Test.uue; then
1288
ac_cv_prog_uudecode_base64=yes
1290
- echo "configure: 25396: uudecode had trouble decoding base 64 file 'Test.uue'" >&5
1291
+ echo "configure: 25402: uudecode had trouble decoding base 64 file 'Test.uue'" >&5
1292
echo "configure: failed file was:" >&5
1294
ac_cv_prog_uudecode_base64=no
1295
@@ -25421,7 +25427,7 @@
1296
CLASS_TEST=Test.class
1298
cat << \EOF > $JAVA_TEST
1299
-/* [#]line 25424 "configure" */
1300
+/* [#]line 25430 "configure" */
1302
public static void main (String args[]) {
1304
@@ -25629,7 +25635,7 @@
1306
CLASS_TEST=Test.class
1307
cat << \EOF > $JAVA_TEST
1308
- /* #line 25632 "configure" */
1309
+ /* #line 25638 "configure" */
1312
public static void main(String args)
1313
--- a/src/libjava/configure
1314
+++ b/src/libjava/configure
1315
@@ -8842,7 +8842,7 @@
1319
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1320
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1321
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1322
# Find out which ABI we are using.
1323
echo 'int i;' > conftest.$ac_ext
1324
@@ -8867,7 +8867,10 @@
1328
- ppc64-*linux*|powerpc64-*linux*)
1329
+ powerpc64le-*linux*)
1330
+ LD="${LD-ld} -m elf32lppclinux"
1332
+ powerpc64-*linux*)
1333
LD="${LD-ld} -m elf32ppclinux"
1336
@@ -8886,7 +8889,10 @@
1338
LD="${LD-ld} -m elf_x86_64"
1340
- ppc*-*linux*|powerpc*-*linux*)
1341
+ powerpcle-*linux*)
1342
+ LD="${LD-ld} -m elf64lppc"
1345
LD="${LD-ld} -m elf64ppc"
1347
s390*-*linux*|s390*-*tpf*)
1348
@@ -13382,7 +13388,7 @@
1349
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1350
lt_status=$lt_dlunknown
1351
cat > conftest.$ac_ext <<_LT_EOF
1352
-#line 13385 "configure"
1353
+#line 13391 "configure"
1354
#include "confdefs.h"
1357
@@ -13488,7 +13494,7 @@
1358
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1359
lt_status=$lt_dlunknown
1360
cat > conftest.$ac_ext <<_LT_EOF
1361
-#line 13491 "configure"
1362
+#line 13497 "configure"
1363
#include "confdefs.h"
1366
@@ -19483,7 +19489,7 @@
1367
enableval=$enable_sjlj_exceptions; :
1369
cat > conftest.$ac_ext << EOF
1370
-#line 19486 "configure"
1371
+#line 19492 "configure"
1375
--- a/src/libgcc/config/rs6000/tramp.S
1376
+++ b/src/libgcc/config/rs6000/tramp.S
1377
@@ -116,4 +116,70 @@
1381
+#elif _CALL_ELF == 2
1382
+ .type trampoline_initial,@object
1384
+trampoline_initial:
1385
+ ld r11,.Lchain(r12)
1386
+ ld r12,.Lfunc(r12)
1389
+.Lfunc = .-trampoline_initial
1390
+ .quad 0 /* will be replaced with function address */
1391
+.Lchain = .-trampoline_initial
1392
+ .quad 0 /* will be replaced with static chain */
1394
+trampoline_size = .-trampoline_initial
1395
+ .size trampoline_initial,trampoline_size
1398
+/* R3 = stack address to store trampoline */
1399
+/* R4 = length of trampoline area */
1400
+/* R5 = function address */
1401
+/* R6 = static chain */
1403
+ .pushsection ".toc","aw"
1405
+ .quad trampoline_initial-8
1408
+FUNC_START(__trampoline_setup)
1409
+ addis 7,2,.LC0@toc@ha
1410
+ ld 7,.LC0@toc@l(7) /* trampoline address -8 */
1412
+ li r8,trampoline_size /* verify that the trampoline is big enough */
1414
+ srwi r4,r4,3 /* # doublewords to move */
1415
+ addi r9,r3,-8 /* adjust pointer for stdu */
1419
+ /* Copy the instructions to the stack */
1425
+ /* Store correct function and static chain */
1427
+ std r6,.Lchain(r3)
1429
+ /* Now flush both caches */
1437
+ /* Finally synchronize things & return */
1443
+ bl JUMP_TARGET(abort)
1445
+FUNC_END(__trampoline_setup)
1448
--- a/src/libgcc/config/rs6000/linux-unwind.h
1449
+++ b/src/libgcc/config/rs6000/linux-unwind.h
1457
#define R_VRSAVE 109
1459
+#ifdef __powerpc64__
1461
+#define TOC_SAVE_SLOT 24
1463
+#define TOC_SAVE_SLOT 40
1469
__attribute__ ((vector_size (16))) int vr[32];
1472
else if (pc[1] == 0x380000AC)
1475
+ /* These old kernel versions never supported ELFv2. */
1476
/* This works for 2.4 kernels, but not for 2.6 kernels with vdso
1477
because pc isn't pointing into the stack. Can be removed when
1478
no one is running 2.4.19 or 2.4.20, the first two ppc64
1480
if ((long) frame24->puc != -21 * 8)
1481
return frame24->puc->regs;
1485
/* This works for 2.4.21 and later kernels. */
1486
struct rt_sigframe {
1489
struct gcc_regs *regs = get_regs (context);
1490
struct gcc_vregs *vregs;
1495
@@ -206,11 +220,21 @@
1496
fs->regs.reg[i].loc.offset = (long) ®s->gpr[i] - new_cfa;
1499
+ /* The CR is saved in the low 32 bits of regs->ccr. */
1500
+ cr_offset = (long) ®s->ccr - new_cfa;
1501
+#ifndef __LITTLE_ENDIAN__
1502
+ cr_offset += sizeof (long) - 4;
1504
+ /* In the ELFv1 ABI, CR2 stands in for the whole CR. */
1505
fs->regs.reg[R_CR2].how = REG_SAVED_OFFSET;
1506
- /* CR? regs are always 32-bit and PPC is big-endian, so in 64-bit
1507
- libgcc loc.offset needs to point to the low 32 bits of regs->ccr. */
1508
- fs->regs.reg[R_CR2].loc.offset = (long) ®s->ccr - new_cfa
1509
- + sizeof (long) - 4;
1510
+ fs->regs.reg[R_CR2].loc.offset = cr_offset;
1512
+ /* In the ELFv2 ABI, every CR field has a separate CFI entry. */
1513
+ fs->regs.reg[R_CR3].how = REG_SAVED_OFFSET;
1514
+ fs->regs.reg[R_CR3].loc.offset = cr_offset;
1515
+ fs->regs.reg[R_CR4].how = REG_SAVED_OFFSET;
1516
+ fs->regs.reg[R_CR4].loc.offset = cr_offset;
1519
fs->regs.reg[R_LR].how = REG_SAVED_OFFSET;
1520
fs->regs.reg[R_LR].loc.offset = (long) ®s->link - new_cfa;
1521
@@ -294,9 +318,13 @@
1522
figure out if it was saved. The big problem here is that the
1523
code that does the save/restore is generated by the linker, so
1524
we have no good way to determine at compile time what to do. */
1525
- if (pc[0] == 0xF8410028
1526
+ if (pc[0] == 0xF8410000 + TOC_SAVE_SLOT
1528
+ /* The ELFv2 linker never generates the old PLT stub form. */
1529
|| ((pc[0] & 0xFFFF0000) == 0x3D820000
1530
- && pc[1] == 0xF8410028))
1531
+ && pc[1] == 0xF8410000 + TOC_SAVE_SLOT)
1535
/* We are in a plt call stub or r2 adjusting long branch stub,
1536
before r2 has been saved. Keep REG_UNSAVED. */
1537
@@ -305,10 +333,12 @@
1540
= (unsigned int *) _Unwind_GetGR (context, R_LR);
1541
- if (insn && *insn == 0xE8410028)
1542
- _Unwind_SetGRPtr (context, 2, context->cfa + 40);
1543
+ if (insn && *insn == 0xE8410000 + TOC_SAVE_SLOT)
1544
+ _Unwind_SetGRPtr (context, 2, context->cfa + TOC_SAVE_SLOT);
1546
+ /* ELFv2 does not use this function pointer call sequence. */
1547
else if (pc[0] == 0x4E800421
1548
- && pc[1] == 0xE8410028)
1549
+ && pc[1] == 0xE8410000 + TOC_SAVE_SLOT)
1551
/* We are at the bctrl instruction in a call via function
1552
pointer. gcc always emits the load of the new R2 just
1554
before the bctrl so this is the first and only place
1555
we need to use the stored R2. */
1556
_Unwind_Word sp = _Unwind_GetGR (context, 1);
1557
- _Unwind_SetGRPtr (context, 2, (void *)(sp + 40));
1558
+ _Unwind_SetGRPtr (context, 2, (void *)(sp + TOC_SAVE_SLOT));
1564
--- a/src/libgcc/ChangeLog.ibm
1565
+++ b/src/libgcc/ChangeLog.ibm
1567
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1569
+ Backport from mainline r204808:
1571
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1572
+ Alan Modra <amodra@gmail.com>
1574
+ * config/rs6000/linux-unwind.h (TOC_SAVE_SLOT): Define.
1575
+ (frob_update_context): Use it.
1577
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1578
+ Alan Modra <amodra@gmail.com>
1580
+ * config/rs6000/tramp.S [__powerpc64__ && _CALL_ELF == 2]:
1581
+ (trampoline_initial): Provide ELFv2 variant.
1582
+ (__trampoline_setup): Likewise.
1584
+ * config/rs6000/linux-unwind.h (frob_update_context): Do not
1585
+ check for AIX indirect function call sequence if _CALL_ELF == 2.
1587
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1588
+ Alan Modra <amodra@gmail.com>
1590
+ * config/rs6000/linux-unwind.h (get_regs): Do not support
1591
+ old kernel versions if _CALL_ELF == 2.
1592
+ (frob_update_context): Do not support PLT stub variants only
1593
+ generated by old linkers if _CALL_ELF == 2.
1595
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1597
+ Backport from mainline r204800:
1599
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1600
+ Alan Modra <amodra@gmail.com>
1602
+ * config/rs6000/linux-unwind.h (ppc_fallback_frame_state): Correct
1603
+ location of CR save area for 64-bit little-endian systems.
1605
--- a/src/config.guess
1606
+++ b/src/config.guess
1609
# Attempt to guess a canonical system name.
1610
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
1611
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
1612
-# 2011, 2012, 2013 Free Software Foundation, Inc.
1613
+# Copyright 1992-2013 Free Software Foundation, Inc.
1615
-timestamp='2012-12-30'
1616
+timestamp='2013-06-10'
1618
# This file is free software; you can redistribute it and/or modify it
1619
# under the terms of the GNU General Public License as published by
1621
GNU config.guess ($timestamp)
1623
Originally written by Per Bothner.
1624
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
1625
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
1626
-2012, 2013 Free Software Foundation, Inc.
1627
+Copyright 1992-2013 Free Software Foundation, Inc.
1629
This is free software; see the source for copying conditions. There is NO
1630
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
1631
@@ -136,6 +132,27 @@
1632
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
1633
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
1635
+case "${UNAME_SYSTEM}" in
1637
+ # If the system lacks a compiler, then just pick glibc.
1638
+ # We could probably try harder.
1641
+ eval $set_cc_for_build
1642
+ cat <<-EOF > $dummy.c
1643
+ #include <features.h>
1644
+ #if defined(__UCLIBC__)
1646
+ #elif defined(__dietlibc__)
1652
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
1656
# Note: order is significant - the case branches are not exclusive.
1658
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
1659
@@ -857,21 +874,21 @@
1663
- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
1664
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
1667
# other systems with GNU libc and userland
1668
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
1669
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
1672
echo ${UNAME_MACHINE}-pc-minix
1675
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1676
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1678
aarch64_be:Linux:*:*)
1679
UNAME_MACHINE=aarch64_be
1680
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1681
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1684
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
1685
@@ -884,59 +901,54 @@
1686
EV68*) UNAME_MACHINE=alphaev68 ;;
1688
objdump --private-headers /bin/sh | grep -q ld.so.1
1689
- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
1690
- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
1691
+ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
1692
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1694
+ arc:Linux:*:* | arceb:Linux:*:*)
1695
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1698
eval $set_cc_for_build
1699
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
1700
| grep -q __ARM_EABI__
1702
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1703
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1705
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
1706
| grep -q __ARM_PCS_VFP
1708
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
1709
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
1711
- echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
1712
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
1717
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1718
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1721
- echo ${UNAME_MACHINE}-axis-linux-gnu
1722
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
1725
- echo ${UNAME_MACHINE}-axis-linux-gnu
1726
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
1729
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1730
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1733
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1734
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1738
- eval $set_cc_for_build
1739
- sed 's/^ //' << EOF >$dummy.c
1740
- #ifdef __dietlibc__
1744
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
1745
- echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
1746
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
1749
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1750
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1753
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1754
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1757
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1758
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1760
mips:Linux:*:* | mips64:Linux:*:*)
1761
eval $set_cc_for_build
1762
@@ -955,54 +967,63 @@
1765
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
1766
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
1767
+ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
1770
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1773
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1774
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1777
- echo sparc-unknown-linux-gnu
1778
+ echo sparc-unknown-linux-${LIBC}
1780
parisc64:Linux:*:* | hppa64:Linux:*:*)
1781
- echo hppa64-unknown-linux-gnu
1782
+ echo hppa64-unknown-linux-${LIBC}
1784
parisc:Linux:*:* | hppa:Linux:*:*)
1785
# Look for CPU level
1786
case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
1787
- PA7*) echo hppa1.1-unknown-linux-gnu ;;
1788
- PA8*) echo hppa2.0-unknown-linux-gnu ;;
1789
- *) echo hppa-unknown-linux-gnu ;;
1790
+ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
1791
+ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
1792
+ *) echo hppa-unknown-linux-${LIBC} ;;
1796
- echo powerpc64-unknown-linux-gnu
1797
+ echo powerpc64-unknown-linux-${LIBC}
1800
- echo powerpc-unknown-linux-gnu
1801
+ echo powerpc-unknown-linux-${LIBC}
1803
+ ppc64le:Linux:*:*)
1804
+ echo powerpc64le-unknown-linux-${LIBC}
1807
+ echo powerpcle-unknown-linux-${LIBC}
1809
s390:Linux:*:* | s390x:Linux:*:*)
1810
- echo ${UNAME_MACHINE}-ibm-linux
1811
+ echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
1814
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1815
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1818
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1819
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1821
sparc:Linux:*:* | sparc64:Linux:*:*)
1822
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1823
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1826
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1827
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1830
- echo ${UNAME_MACHINE}-dec-linux-gnu
1831
+ echo ${UNAME_MACHINE}-dec-linux-${LIBC}
1834
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1835
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1838
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1839
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1841
i*86:DYNIX/ptx:4*:*)
1842
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
1843
@@ -1235,19 +1256,21 @@
1846
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
1847
- case $UNAME_PROCESSOR in
1849
- eval $set_cc_for_build
1850
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1851
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1852
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1853
- grep IS_64BIT_ARCH >/dev/null
1855
- UNAME_PROCESSOR="x86_64"
1858
- unknown) UNAME_PROCESSOR=powerpc ;;
1860
+ eval $set_cc_for_build
1861
+ if test "$UNAME_PROCESSOR" = unknown ; then
1862
+ UNAME_PROCESSOR=powerpc
1864
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1865
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1866
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1867
+ grep IS_64BIT_ARCH >/dev/null
1869
+ case $UNAME_PROCESSOR in
1870
+ i386) UNAME_PROCESSOR=x86_64 ;;
1871
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
1875
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
1877
*:procnto*:*:* | *:QNX:[0123456789]*:*)
1878
--- a/src/gcc/configure
1879
+++ b/src/gcc/configure
1880
@@ -13589,7 +13589,7 @@
1884
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1885
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1886
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1887
# Find out which ABI we are using.
1888
echo 'int i;' > conftest.$ac_ext
1889
@@ -13614,7 +13614,10 @@
1893
- ppc64-*linux*|powerpc64-*linux*)
1894
+ powerpc64le-*linux*)
1895
+ LD="${LD-ld} -m elf32lppclinux"
1897
+ powerpc64-*linux*)
1898
LD="${LD-ld} -m elf32ppclinux"
1901
@@ -13633,7 +13636,10 @@
1903
LD="${LD-ld} -m elf_x86_64"
1905
- ppc*-*linux*|powerpc*-*linux*)
1906
+ powerpcle-*linux*)
1907
+ LD="${LD-ld} -m elf64lppc"
1910
LD="${LD-ld} -m elf64ppc"
1912
s390*-*linux*|s390*-*tpf*)
1913
--- a/src/gcc/builtins.c
1914
+++ b/src/gcc/builtins.c
1915
@@ -5861,6 +5861,9 @@
1918
CASE_FLT_FN (BUILT_IN_FABS):
1919
+ case BUILT_IN_FABSD32:
1920
+ case BUILT_IN_FABSD64:
1921
+ case BUILT_IN_FABSD128:
1922
target = expand_builtin_fabs (exp, target, subtarget);
1925
@@ -10313,6 +10316,9 @@
1926
return fold_builtin_strlen (loc, type, arg0);
1928
CASE_FLT_FN (BUILT_IN_FABS):
1929
+ case BUILT_IN_FABSD32:
1930
+ case BUILT_IN_FABSD64:
1931
+ case BUILT_IN_FABSD128:
1932
return fold_builtin_fabs (loc, arg0, type);
1935
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
1936
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
1938
/* { dg-final { scan-assembler-times "fabs" 3 } } */
1939
/* { dg-final { scan-assembler-times "fnabs" 3 } } */
1940
/* { dg-final { scan-assembler-times "fsel" 3 } } */
1941
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
1942
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
1943
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
1945
/* fabs/fnabs/fsel */
1946
double normal1 (double a, double b) { return __builtin_copysign (a, b); }
1947
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
1948
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
1950
+/* { dg-do compile { target { powerpc*-*-* } } } */
1951
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
1952
+/* { dg-require-effective-target powerpc_p8vector_ok } */
1953
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
1956
+#define TYPE long long
1960
+#define SIGN_TYPE signed TYPE
1964
+#define UNS_TYPE unsigned TYPE
1967
+typedef vector SIGN_TYPE v_sign;
1968
+typedef vector UNS_TYPE v_uns;
1970
+v_sign sign_add (v_sign a, v_sign b)
1975
+v_sign sign_sub (v_sign a, v_sign b)
1980
+v_sign sign_shift_left (v_sign a, v_sign b)
1985
+v_sign sign_shift_right (v_sign a, v_sign b)
1990
+v_uns uns_add (v_uns a, v_uns b)
1995
+v_uns uns_sub (v_uns a, v_uns b)
2000
+v_uns uns_shift_left (v_uns a, v_uns b)
2005
+v_uns uns_shift_right (v_uns a, v_uns b)
2010
+/* { dg-final { scan-assembler-times "vaddudm" 2 } } */
2011
+/* { dg-final { scan-assembler-times "vsubudm" 2 } } */
2012
+/* { dg-final { scan-assembler-times "vsld" 2 } } */
2013
+/* { dg-final { scan-assembler-times "vsrad" 1 } } */
2014
+/* { dg-final { scan-assembler-times "vsrd" 1 } } */
2015
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
2016
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
2018
+/* { dg-do compile { target { powerpc*-*-* } } } */
2019
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2020
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2021
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
2032
+#define TYPE long long
2036
+#define SIGN_TYPE signed TYPE
2040
+#define UNS_TYPE unsigned TYPE
2043
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
2045
+SIGN_TYPE sa[SIZE] ALIGN_ATTR;
2046
+SIGN_TYPE sb[SIZE] ALIGN_ATTR;
2047
+SIGN_TYPE sc[SIZE] ALIGN_ATTR;
2049
+UNS_TYPE ua[SIZE] ALIGN_ATTR;
2050
+UNS_TYPE ub[SIZE] ALIGN_ATTR;
2051
+UNS_TYPE uc[SIZE] ALIGN_ATTR;
2058
+ for (i = 0; i < SIZE; i++)
2059
+ sa[i] = sb[i] + sc[i];
2067
+ for (i = 0; i < SIZE; i++)
2068
+ sa[i] = sb[i] - sc[i];
2072
+sign_shift_left (void)
2076
+ for (i = 0; i < SIZE; i++)
2077
+ sa[i] = sb[i] << sc[i];
2081
+sign_shift_right (void)
2085
+ for (i = 0; i < SIZE; i++)
2086
+ sa[i] = sb[i] >> sc[i];
2094
+ for (i = 0; i < SIZE; i++)
2095
+ sa[i] = (sb[i] > sc[i]) ? sb[i] : sc[i];
2103
+ for (i = 0; i < SIZE; i++)
2104
+ sa[i] = (sb[i] < sc[i]) ? sb[i] : sc[i];
2112
+ for (i = 0; i < SIZE; i++)
2113
+ sa[i] = (sb[i] < 0) ? -sb[i] : sb[i]; /* xor, vsubudm, vmaxsd. */
2117
+sign_eq (SIGN_TYPE val1, SIGN_TYPE val2)
2121
+ for (i = 0; i < SIZE; i++)
2122
+ sa[i] = (sb[i] == sc[i]) ? val1 : val2;
2126
+sign_lt (SIGN_TYPE val1, SIGN_TYPE val2)
2130
+ for (i = 0; i < SIZE; i++)
2131
+ sa[i] = (sb[i] < sc[i]) ? val1 : val2;
2139
+ for (i = 0; i < SIZE; i++)
2140
+ ua[i] = ub[i] + uc[i];
2148
+ for (i = 0; i < SIZE; i++)
2149
+ ua[i] = ub[i] - uc[i];
2153
+uns_shift_left (void)
2157
+ for (i = 0; i < SIZE; i++)
2158
+ ua[i] = ub[i] << uc[i];
2162
+uns_shift_right (void)
2166
+ for (i = 0; i < SIZE; i++)
2167
+ ua[i] = ub[i] >> uc[i];
2175
+ for (i = 0; i < SIZE; i++)
2176
+ ua[i] = (ub[i] > uc[i]) ? ub[i] : uc[i];
2184
+ for (i = 0; i < SIZE; i++)
2185
+ ua[i] = (ub[i] < uc[i]) ? ub[i] : uc[i];
2189
+uns_eq (UNS_TYPE val1, UNS_TYPE val2)
2193
+ for (i = 0; i < SIZE; i++)
2194
+ ua[i] = (ub[i] == uc[i]) ? val1 : val2;
2198
+uns_lt (UNS_TYPE val1, UNS_TYPE val2)
2202
+ for (i = 0; i < SIZE; i++)
2203
+ ua[i] = (ub[i] < uc[i]) ? val1 : val2;
2206
+/* { dg-final { scan-assembler-times "\[\t \]vaddudm\[\t \]" 2 } } */
2207
+/* { dg-final { scan-assembler-times "\[\t \]vsubudm\[\t \]" 3 } } */
2208
+/* { dg-final { scan-assembler-times "\[\t \]vmaxsd\[\t \]" 2 } } */
2209
+/* { dg-final { scan-assembler-times "\[\t \]vmaxud\[\t \]" 1 } } */
2210
+/* { dg-final { scan-assembler-times "\[\t \]vminsd\[\t \]" 1 } } */
2211
+/* { dg-final { scan-assembler-times "\[\t \]vminud\[\t \]" 1 } } */
2212
+/* { dg-final { scan-assembler-times "\[\t \]vsld\[\t \]" 2 } } */
2213
+/* { dg-final { scan-assembler-times "\[\t \]vsrad\[\t \]" 1 } } */
2214
+/* { dg-final { scan-assembler-times "\[\t \]vsrd\[\t \]" 1 } } */
2215
+/* { dg-final { scan-assembler-times "\[\t \]vcmpequd\[\t \]" 2 } } */
2216
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtsd\[\t \]" 1 } } */
2217
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtud\[\t \]" 1 } } */
2218
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57744.c
2219
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57744.c
2221
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
2222
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2223
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2224
+/* { dg-options "-mcpu=power8 -O3" } */
2228
+typedef unsigned U_16 __attribute__((mode(TI)));
2230
+extern int libat_compare_exchange_16 (U_16 *, U_16 *, U_16, int, int)
2231
+ __attribute__((__noinline__));
2233
+/* PR 57744: lqarx/stqcx needs even/odd register pairs. The assembler will
2234
+ complain if the compiler gets an odd/even register pair. Create a function
2235
+ which has the 16 byte compare and exchange instructions, but don't actually
2236
+ execute it, so that we can detect these failures on older machines. */
2239
+libat_compare_exchange_16 (U_16 *mptr, U_16 *eptr, U_16 newval,
2240
+ int smodel, int fmodel __attribute__((unused)))
2242
+ if (((smodel) == 0))
2243
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 0, 0);
2244
+ else if (((smodel) != 5))
2245
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 4, 0);
2247
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 5, 0);
2250
+U_16 a = 1, b = 1, c = -2;
2251
+volatile int do_test = 0;
2255
+ if (do_test && !libat_compare_exchange_16 (&a, &b, c, 0, 0))
2260
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-1.c
2261
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-1.c
2263
/* { dg-options "-O2 -mrecip -ffast-math -mcpu=power6" } */
2264
/* { dg-final { scan-assembler-times "frsqrte" 2 } } */
2265
/* { dg-final { scan-assembler-times "fmsub" 2 } } */
2266
-/* { dg-final { scan-assembler-times "fmul" 8 } } */
2267
-/* { dg-final { scan-assembler-times "fnmsub" 4 } } */
2268
+/* { dg-final { scan-assembler-times "fmul" 6 } } */
2269
+/* { dg-final { scan-assembler-times "fnmsub" 3 } } */
2273
--- a/src/gcc/testsuite/gcc.target/powerpc/darwin-longlong.c
2274
+++ b/src/gcc/testsuite/gcc.target/powerpc/darwin-longlong.c
2279
+#ifdef __LITTLE_ENDIAN__
2287
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
2288
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
2290
+/* { dg-do compile { target { powerpc*-*-* } } } */
2291
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2292
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2293
+/* { dg-options "-O2 -mcpu=power8" } */
2294
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
2295
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
2296
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
2297
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
2298
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
2299
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
2300
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
2301
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
2302
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
2303
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
2304
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
2305
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
2306
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
2307
+/* { dg-final { scan-assembler "\[ \t\]xxland " } } */
2308
+/* { dg-final { scan-assembler "\[ \t\]xxlor " } } */
2309
+/* { dg-final { scan-assembler "\[ \t\]xxlxor " } } */
2310
+/* { dg-final { scan-assembler "\[ \t\]xxlnor " } } */
2311
+/* { dg-final { scan-assembler "\[ \t\]xxlandc " } } */
2312
+/* { dg-final { scan-assembler "\[ \t\]xxleqv " } } */
2313
+/* { dg-final { scan-assembler "\[ \t\]xxlorc " } } */
2314
+/* { dg-final { scan-assembler "\[ \t\]xxlnand " } } */
2317
+typedef int v4si __attribute__ ((vector_size (16)));
2322
--- a/src/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
2323
+++ b/src/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
2325
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2326
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2327
+/* { dg-require-effective-target powerpc_vsx_ok } */
2328
+/* { dg-options "-O2 -mcpu=power6x -mmfpgpr" } */
2329
+/* { dg-final { scan-assembler "mffgpr" } } */
2330
+/* { dg-final { scan-assembler "mftgpr" } } */
2332
+/* Test that we generate the instructions to move between the GPR and FPR
2333
+ registers under power6x. */
2335
+extern long return_long (void);
2336
+extern double return_double (void);
2338
+double return_double2 (void)
2340
+ return (double) return_long ();
2343
+long return_long2 (void)
2345
+ return (long) return_double ();
2347
--- a/src/gcc/testsuite/gcc.target/powerpc/pr60203.c
2348
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr60203.c
2350
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2351
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2352
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2353
+/* { dg-options "-mcpu=power8 -O3" } */
2355
+union u_ld { long double ld; double d[2]; };
2358
+pack (double a, double aa)
2367
+unpack_0 (long double x)
2375
+unpack_1 (long double x)
2382
+/* { dg-final { scan-assembler-not "stfd" } } */
2383
+/* { dg-final { scan-assembler-not "lfd" } } */
2384
+/* { dg-final { scan-assembler-not "lxsdx" } } */
2385
+/* { dg-final { scan-assembler-not "stxsdx" } } */
2386
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
2387
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
2390
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
2391
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
2393
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
2394
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2395
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
2396
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2397
+/* { dg-options "-mcpu=power8 -O2" } */
2398
+/* { dg-final { scan-assembler "mtvsrd" } } */
2399
+/* { dg-final { scan-assembler "mfvsrd" } } */
2401
+/* Check code generation for direct move for vector types. */
2403
+#define TYPE vector int
2404
+#define VSX_REG_ATTR "wa"
2406
+#include "direct-move.h"
2407
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-av.c
2408
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-av.c
2410
+/* { dg-do compile { target { powerpc*-*-* } } } */
2411
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2412
+/* { dg-require-effective-target powerpc_altivec_ok } */
2413
+/* { dg-options "-O2 -mcpu=power6 -maltivec" } */
2414
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
2415
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
2416
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
2417
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
2418
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
2419
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
2420
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
2421
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
2422
+/* { dg-final { scan-assembler "\[ \t\]vand " } } */
2423
+/* { dg-final { scan-assembler "\[ \t\]vandc " } } */
2424
+/* { dg-final { scan-assembler "\[ \t\]vor " } } */
2425
+/* { dg-final { scan-assembler "\[ \t\]vxor " } } */
2426
+/* { dg-final { scan-assembler "\[ \t\]vnor " } } */
2427
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
2428
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
2429
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
2430
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
2431
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
2432
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
2433
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
2434
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
2437
+typedef int v4si __attribute__ ((vector_size (16)));
2442
--- a/src/gcc/testsuite/gcc.target/powerpc/pr43154.c
2443
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr43154.c
2445
/* { dg-do compile { target { powerpc*-*-* } } } */
2446
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2447
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
2448
/* { dg-require-effective-target powerpc_vsx_ok } */
2449
/* { dg-options "-O2 -mcpu=power7" } */
2451
--- a/src/gcc/testsuite/gcc.target/powerpc/pr59054.c
2452
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr59054.c
2454
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2455
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2456
+/* { dg-require-effective-target powerpc_vsx_ok } */
2457
+/* { dg-options "-mcpu=power7 -O0 -m64" } */
2459
+long foo (void) { return 0; }
2461
+/* { dg-final { scan-assembler-not "xxlor" } } */
2462
+/* { dg-final { scan-assembler-not "stfd" } } */
2463
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
2464
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
2466
+/* { dg-do compile { target { powerpc*-*-* } } } */
2467
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2468
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2469
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
2471
+#include <altivec.h>
2473
+typedef vector long long v_sign;
2474
+typedef vector unsigned long long v_uns;
2475
+typedef vector bool long long v_bool;
2477
+v_sign sign_add_1 (v_sign a, v_sign b)
2479
+ return __builtin_altivec_vaddudm (a, b);
2482
+v_sign sign_add_2 (v_sign a, v_sign b)
2484
+ return vec_add (a, b);
2487
+v_sign sign_add_3 (v_sign a, v_sign b)
2489
+ return vec_vaddudm (a, b);
2492
+v_sign sign_sub_1 (v_sign a, v_sign b)
2494
+ return __builtin_altivec_vsubudm (a, b);
2497
+v_sign sign_sub_2 (v_sign a, v_sign b)
2499
+ return vec_sub (a, b);
2503
+v_sign sign_sub_3 (v_sign a, v_sign b)
2505
+ return vec_vsubudm (a, b);
2508
+v_sign sign_min_1 (v_sign a, v_sign b)
2510
+ return __builtin_altivec_vminsd (a, b);
2513
+v_sign sign_min_2 (v_sign a, v_sign b)
2515
+ return vec_min (a, b);
2518
+v_sign sign_min_3 (v_sign a, v_sign b)
2520
+ return vec_vminsd (a, b);
2523
+v_sign sign_max_1 (v_sign a, v_sign b)
2525
+ return __builtin_altivec_vmaxsd (a, b);
2528
+v_sign sign_max_2 (v_sign a, v_sign b)
2530
+ return vec_max (a, b);
2533
+v_sign sign_max_3 (v_sign a, v_sign b)
2535
+ return vec_vmaxsd (a, b);
2538
+v_sign sign_abs (v_sign a)
2540
+ return vec_abs (a); /* xor, vsubudm, vmaxsd. */
2543
+v_bool sign_eq (v_sign a, v_sign b)
2545
+ return vec_cmpeq (a, b);
2548
+v_bool sign_lt (v_sign a, v_sign b)
2550
+ return vec_cmplt (a, b);
2553
+v_uns uns_add_2 (v_uns a, v_uns b)
2555
+ return vec_add (a, b);
2558
+v_uns uns_add_3 (v_uns a, v_uns b)
2560
+ return vec_vaddudm (a, b);
2563
+v_uns uns_sub_2 (v_uns a, v_uns b)
2565
+ return vec_sub (a, b);
2568
+v_uns uns_sub_3 (v_uns a, v_uns b)
2570
+ return vec_vsubudm (a, b);
2573
+v_uns uns_min_2 (v_uns a, v_uns b)
2575
+ return vec_min (a, b);
2578
+v_uns uns_min_3 (v_uns a, v_uns b)
2580
+ return vec_vminud (a, b);
2583
+v_uns uns_max_2 (v_uns a, v_uns b)
2585
+ return vec_max (a, b);
2588
+v_uns uns_max_3 (v_uns a, v_uns b)
2590
+ return vec_vmaxud (a, b);
2593
+v_bool uns_eq (v_uns a, v_uns b)
2595
+ return vec_cmpeq (a, b);
2598
+v_bool uns_lt (v_uns a, v_uns b)
2600
+ return vec_cmplt (a, b);
2603
+v_sign sign_rl_1 (v_sign a, v_sign b)
2605
+ return __builtin_altivec_vrld (a, b);
2608
+v_sign sign_rl_2 (v_sign a, v_uns b)
2610
+ return vec_rl (a, b);
2613
+v_uns uns_rl_2 (v_uns a, v_uns b)
2615
+ return vec_rl (a, b);
2618
+v_sign sign_sl_1 (v_sign a, v_sign b)
2620
+ return __builtin_altivec_vsld (a, b);
2623
+v_sign sign_sl_2 (v_sign a, v_uns b)
2625
+ return vec_sl (a, b);
2628
+v_sign sign_sl_3 (v_sign a, v_uns b)
2630
+ return vec_vsld (a, b);
2633
+v_uns uns_sl_2 (v_uns a, v_uns b)
2635
+ return vec_sl (a, b);
2638
+v_uns uns_sl_3 (v_uns a, v_uns b)
2640
+ return vec_vsld (a, b);
2643
+v_sign sign_sra_1 (v_sign a, v_sign b)
2645
+ return __builtin_altivec_vsrad (a, b);
2648
+v_sign sign_sra_2 (v_sign a, v_uns b)
2650
+ return vec_sra (a, b);
2653
+v_sign sign_sra_3 (v_sign a, v_uns b)
2655
+ return vec_vsrad (a, b);
2658
+/* { dg-final { scan-assembler-times "vaddudm" 5 } } */
2659
+/* { dg-final { scan-assembler-times "vsubudm" 6 } } */
2660
+/* { dg-final { scan-assembler-times "vmaxsd" 4 } } */
2661
+/* { dg-final { scan-assembler-times "vminsd" 3 } } */
2662
+/* { dg-final { scan-assembler-times "vmaxud" 2 } } */
2663
+/* { dg-final { scan-assembler-times "vminud" 2 } } */
2664
+/* { dg-final { scan-assembler-times "vcmpequd" 2 } } */
2665
+/* { dg-final { scan-assembler-times "vcmpgtsd" 1 } } */
2666
+/* { dg-final { scan-assembler-times "vcmpgtud" 1 } } */
2667
+/* { dg-final { scan-assembler-times "vrld" 3 } } */
2668
+/* { dg-final { scan-assembler-times "vsld" 5 } } */
2669
+/* { dg-final { scan-assembler-times "vsrad" 3 } } */
2670
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
2671
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
2673
+/* { dg-do compile { target { powerpc*-*-* } } } */
2674
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2675
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2676
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
2678
+#include <stddef.h>
2688
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
2690
+long long sign_ll[SIZE] ALIGN_ATTR;
2691
+int sign_i [SIZE] ALIGN_ATTR;
2693
+void copy_int_to_long_long (void)
2697
+ for (i = 0; i < SIZE; i++)
2698
+ sign_ll[i] = sign_i[i];
2701
+/* { dg-final { scan-assembler "vupkhsw" } } */
2702
+/* { dg-final { scan-assembler "vupklsw" } } */
2703
--- a/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-3.c
2704
+++ b/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-3.c
2706
+/* { dg-do compile } */
2707
+/* { dg-require-effective-target powerpc_altivec_ok } */
2708
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
2709
+/* { dg-options "-O -maltivec -mno-vsx" } */
2711
+typedef unsigned char V __attribute__((vector_size(16)));
2715
+ return __builtin_shuffle(x, y,
2716
+ (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
2722
+ return __builtin_shuffle(x, y,
2723
+ (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
2726
+/* { dg-final { scan-assembler-not "vperm" } } */
2727
+/* { dg-final { scan-assembler "vpkuhum" } } */
2728
+/* { dg-final { scan-assembler "vpkuwum" } } */
2729
--- a/src/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
2730
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
2732
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2733
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2734
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2735
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
2739
+ QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
2740
+ DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
2742
+enum bytecode_opcode
2744
+ neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
2745
+ constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
2746
+ loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
2747
+ storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
2748
+ addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
2749
+ convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
2750
+ convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
2751
+ convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
2752
+ convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
2753
+ convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
2754
+ convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
2755
+ convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
2756
+ convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
2757
+ convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
2758
+ convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
2759
+ addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
2760
+ mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
2761
+ divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
2762
+ xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
2763
+ rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
2764
+ leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
2765
+ geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
2766
+ eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
2767
+ negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
2768
+ predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
2769
+ preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
2770
+ preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
2771
+ postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
2772
+ postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
2773
+ postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
2774
+ caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
2776
+struct binary_operator
2778
+ enum bytecode_opcode opcode;
2779
+ enum typecode arg0;
2781
+static struct conversion_recipe
2783
+ unsigned char *opcodes;
2786
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
2787
+ LAST_AND_UNUSED_TYPECODE)];
2788
+static struct conversion_recipe
2789
+deduce_conversion (from, to)
2790
+ enum typecode from, to;
2792
+ (conversion_recipe[(int) from][(int) to].
2793
+ opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
2794
+ deduce_conversion (from, to), 0));
2798
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
2799
+ struct binary_operator optab[];
2801
+ int i, besti, cost, bestcost;
2802
+ enum typecode resultcode, arg0code;
2803
+ for (i = 0; optab[i].opcode != -1; ++i)
2805
+ (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
2806
+ opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
2807
+ deduce_conversion (arg0code, optab[i].arg0), 0));
2810
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-1.c
2811
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-1.c
2813
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2814
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
2815
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
2816
/* { dg-options "-O2 -mno-pointers-to-nested-functions" } */
2819
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
2820
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
2822
+/* { dg-do compile { target { powerpc*-*-* } } } */
2823
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2824
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2825
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf -fno-math-errno" } */
2827
+float abs_sf (float *p)
2830
+ __asm__ ("# reg %x0" : "+v" (f));
2831
+ return __builtin_fabsf (f);
2834
+float nabs_sf (float *p)
2837
+ __asm__ ("# reg %x0" : "+v" (f));
2838
+ return - __builtin_fabsf (f);
2841
+float neg_sf (float *p)
2844
+ __asm__ ("# reg %x0" : "+v" (f));
2848
+float add_sf (float *p, float *q)
2852
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2856
+float sub_sf (float *p, float *q)
2860
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2864
+float mul_sf (float *p, float *q)
2868
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2872
+float div_sf (float *p, float *q)
2876
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2880
+float sqrt_sf (float *p)
2883
+ __asm__ ("# reg %x0" : "+v" (f));
2884
+ return __builtin_sqrtf (f);
2888
+double abs_df (double *p)
2891
+ __asm__ ("# reg %x0" : "+v" (d));
2892
+ return __builtin_fabs (d);
2895
+double nabs_df (double *p)
2898
+ __asm__ ("# reg %x0" : "+v" (d));
2899
+ return - __builtin_fabs (d);
2902
+double neg_df (double *p)
2905
+ __asm__ ("# reg %x0" : "+v" (d));
2909
+double add_df (double *p, double *q)
2913
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2917
+double sub_df (double *p, double *q)
2921
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2925
+double mul_df (double *p, double *q)
2929
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2933
+double div_df (double *p, double *q)
2937
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2941
+double sqrt_df (float *p)
2944
+ __asm__ ("# reg %x0" : "+v" (d));
2945
+ return __builtin_sqrt (d);
2948
+/* { dg-final { scan-assembler "xsabsdp" } } */
2949
+/* { dg-final { scan-assembler "xsadddp" } } */
2950
+/* { dg-final { scan-assembler "xsaddsp" } } */
2951
+/* { dg-final { scan-assembler "xsdivdp" } } */
2952
+/* { dg-final { scan-assembler "xsdivsp" } } */
2953
+/* { dg-final { scan-assembler "xsmuldp" } } */
2954
+/* { dg-final { scan-assembler "xsmulsp" } } */
2955
+/* { dg-final { scan-assembler "xsnabsdp" } } */
2956
+/* { dg-final { scan-assembler "xsnegdp" } } */
2957
+/* { dg-final { scan-assembler "xssqrtdp" } } */
2958
+/* { dg-final { scan-assembler "xssqrtsp" } } */
2959
+/* { dg-final { scan-assembler "xssubdp" } } */
2960
+/* { dg-final { scan-assembler "xssubsp" } } */
2961
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
2962
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
2964
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
2965
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2966
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
2967
+/* { dg-require-effective-target p8vector_hw } */
2968
+/* { dg-options "-mcpu=power8 -O2" } */
2970
+/* Check whether we get the right bits for direct move at runtime. */
2972
+#define TYPE vector int
2974
+#define VSX_REG_ATTR "wa"
2976
+#include "direct-move.h"
2977
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
2978
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
2980
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2981
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2982
+/* { dg-require-effective-target powerpc_vsx_ok } */
2983
+/* { dg-options "-O2 -mcpu=power7" } */
2984
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
2985
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
2986
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
2987
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
2988
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
2989
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
2990
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
2991
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
2992
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
2993
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
2994
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
2995
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
2996
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
2997
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
2998
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
2999
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
3000
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
3001
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
3003
+/* On power7, for 128-bit types, ORC/ANDC/EQV might not show up, since the
3004
+ vector unit doesn't support these, so the appropriate combine patterns may
3005
+ not be generated. */
3009
+#define TYPE __int128_t
3011
+typedef int v4si __attribute__ ((vector_size (16)));
3017
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
3018
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
3020
+/* { dg-do compile { target { powerpc*-*-* } } } */
3021
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3022
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3023
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
3025
+#include <altivec.h>
3027
+typedef vector long long vll_sign;
3028
+typedef vector unsigned long long vll_uns;
3029
+typedef vector bool long long vll_bool;
3031
+typedef vector int vi_sign;
3032
+typedef vector unsigned int vi_uns;
3033
+typedef vector bool int vi_bool;
3035
+typedef vector short vs_sign;
3036
+typedef vector unsigned short vs_uns;
3037
+typedef vector bool short vs_bool;
3039
+typedef vector signed char vc_sign;
3040
+typedef vector unsigned char vc_uns;
3041
+typedef vector bool char vc_bool;
3044
+vi_sign vi_pack_1 (vll_sign a, vll_sign b)
3046
+ return __builtin_altivec_vpkudum (a, b);
3049
+vi_sign vi_pack_2 (vll_sign a, vll_sign b)
3051
+ return vec_pack (a, b);
3054
+vi_sign vi_pack_3 (vll_sign a, vll_sign b)
3056
+ return vec_vpkudum (a, b);
3059
+vs_sign vs_pack_1 (vi_sign a, vi_sign b)
3061
+ return __builtin_altivec_vpkuwum (a, b);
3064
+vs_sign vs_pack_2 (vi_sign a, vi_sign b)
3066
+ return vec_pack (a, b);
3069
+vs_sign vs_pack_3 (vi_sign a, vi_sign b)
3071
+ return vec_vpkuwum (a, b);
3074
+vc_sign vc_pack_1 (vs_sign a, vs_sign b)
3076
+ return __builtin_altivec_vpkuhum (a, b);
3079
+vc_sign vc_pack_2 (vs_sign a, vs_sign b)
3081
+ return vec_pack (a, b);
3084
+vc_sign vc_pack_3 (vs_sign a, vs_sign b)
3086
+ return vec_vpkuhum (a, b);
3089
+vll_sign vll_unpack_hi_1 (vi_sign a)
3091
+ return __builtin_altivec_vupkhsw (a);
3094
+vll_sign vll_unpack_hi_2 (vi_sign a)
3096
+ return vec_unpackh (a);
3099
+vll_sign vll_unpack_hi_3 (vi_sign a)
3101
+ return __builtin_vec_vupkhsw (a);
3104
+vll_sign vll_unpack_lo_1 (vi_sign a)
3106
+ return vec_vupklsw (a);
3109
+vll_sign vll_unpack_lo_2 (vi_sign a)
3111
+ return vec_unpackl (a);
3114
+vll_sign vll_unpack_lo_3 (vi_sign a)
3116
+ return vec_vupklsw (a);
3119
+/* { dg-final { scan-assembler-times "vpkudum" 3 } } */
3120
+/* { dg-final { scan-assembler-times "vpkuwum" 3 } } */
3121
+/* { dg-final { scan-assembler-times "vpkuhum" 3 } } */
3122
+/* { dg-final { scan-assembler-times "vupklsw" 3 } } */
3123
+/* { dg-final { scan-assembler-times "vupkhsw" 3 } } */
3124
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
3125
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
3127
+/* { dg-do compile { target { powerpc*-*-* } } } */
3128
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3129
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3130
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
3132
+#include <stddef.h>
3142
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
3144
+long long sign_ll[SIZE] ALIGN_ATTR;
3145
+int sign_i [SIZE] ALIGN_ATTR;
3147
+void copy_long_long_to_int (void)
3151
+ for (i = 0; i < SIZE; i++)
3152
+ sign_i[i] = sign_ll[i];
3155
+/* { dg-final { scan-assembler "vpkudum" } } */
3156
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move.h
3157
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move.h
3159
+/* Test functions for direct move support. */
3162
+extern void abort (void);
3164
+#ifndef VSX_REG_ATTR
3165
+#define VSX_REG_ATTR "wa"
3168
+void __attribute__((__noinline__))
3169
+copy (TYPE *a, TYPE *b)
3175
+void __attribute__((__noinline__))
3176
+load_gpr (TYPE *a, TYPE *b)
3179
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
3185
+void __attribute__((__noinline__))
3186
+load_fpr (TYPE *a, TYPE *b)
3189
+ __asm__ ("# fpr, reg = %0" : "+d" (c));
3195
+void __attribute__((__noinline__))
3196
+load_altivec (TYPE *a, TYPE *b)
3199
+ __asm__ ("# altivec, reg = %0" : "+v" (c));
3205
+void __attribute__((__noinline__))
3206
+load_vsx (TYPE *a, TYPE *b)
3209
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
3214
+#ifndef NO_GPR_TO_VSX
3215
+void __attribute__((__noinline__))
3216
+load_gpr_to_vsx (TYPE *a, TYPE *b)
3220
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
3222
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (d));
3227
+#ifndef NO_VSX_TO_GPR
3228
+void __attribute__((__noinline__))
3229
+load_vsx_to_gpr (TYPE *a, TYPE *b)
3233
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
3235
+ __asm__ ("# gpr, reg = %0" : "+b" (d));
3241
+typedef void (fn_type (TYPE *, TYPE *));
3243
+struct test_struct {
3248
+const struct test_struct test_functions[] = {
3251
+ { load_gpr, "load_gpr" },
3254
+ { load_fpr, "load_fpr" },
3257
+ { load_altivec, "load_altivec" },
3260
+ { load_vsx, "load_vsx" },
3262
+#ifndef NO_GPR_TO_VSX
3263
+ { load_gpr_to_vsx, "load_gpr_to_vsx" },
3265
+#ifndef NO_VSX_TO_GPR
3266
+ { load_vsx_to_gpr, "load_vsx_to_gpr" },
3270
+/* Test a given value for each of the functions. */
3271
+void __attribute__((__noinline__))
3272
+test_value (TYPE a)
3276
+ for (i = 0; i < sizeof (test_functions) / sizeof (test_functions[0]); i++)
3280
+ test_functions[i].func (&a, &b);
3281
+ if (memcmp ((void *)&a, (void *)&b, sizeof (TYPE)) != 0)
3286
+/* Main program. */
3293
+ unsigned char bytes[sizeof (TYPE)];
3297
+ TYPE value = (TYPE)-5;
3298
+ for (i = 0; i < 12; i++)
3300
+ test_value (value);
3304
+ for (i = 0; i < 8*sizeof (TYPE); i++)
3305
+ test_value (((TYPE)1) << i);
3308
+ TYPE value = (TYPE)0;
3309
+ for (i = 0; i < 10; i++)
3311
+ test_value (value);
3312
+ test_value (~ value);
3316
+ for (i = 0; i < 8*sizeof (TYPE); i++)
3317
+ test_value (((TYPE)1) << i);
3320
+ TYPE value = (TYPE)-5;
3321
+ for (i = 0; i < 12; i++)
3323
+ test_value (value);
3327
+ test_value ((TYPE)3.1415926535);
3328
+ test_value ((TYPE)1.23456);
3329
+ test_value ((TYPE)(-0.0));
3330
+ test_value ((TYPE)NAN);
3331
+ test_value ((TYPE)+INFINITY);
3332
+ test_value ((TYPE)-INFINITY);
3335
+ for (j = 0; j < 10; j++)
3337
+ for (i = 0; i < sizeof (TYPE); i++)
3338
+ u.bytes[i] = (unsigned char) (random () >> 4);
3340
+ test_value (u.value);
3347
--- a/src/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
3348
+++ b/src/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
3350
+/* { dg-do compile { target { powerpc*-*-* } } } */
3351
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3352
+/* { dg-require-effective-target powerpc_vsx_ok } */
3353
+/* { dg-options "-O2 -mcpu=power7 -mhard-dfp" } */
3354
+/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
3355
+/* { dg-final { scan-assembler-times "stfiwx" 1 } } */
3356
+/* { dg-final { scan-assembler-not "lfd" } } */
3357
+/* { dg-final { scan-assembler-not "stfd" } } */
3358
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
3359
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
3360
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
3362
+/* Test that power7 can directly load/store SDmode variables without using a
3366
+void inc_dec32 (void)
3368
+ a += (_Decimal32) 1.0;
3370
--- a/src/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
3371
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
3373
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3374
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3375
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3376
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
3378
+#include <stddef.h>
3379
+#include <stdlib.h>
3381
+#include <string.h>
3383
+typedef long unsigned int size_t;
3384
+typedef struct _IO_FILE FILE;
3385
+typedef float real;
3386
+typedef real rvec[3];
3387
+typedef real matrix[3][3];
3388
+typedef real tensor[3][3];
3391
+ F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
3392
+ F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
3393
+ F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
3394
+ F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
3395
+ F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
3396
+ F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
3397
+ F_DVDL, F_DVDLKIN, F_NRE
3407
+ real rA, krA, rB, krB;
3414
+ t_iparams *iparams;
3439
+ eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
3440
+ eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
3441
+ eoObsNR, eoInter, eoUseVirial, eoNR
3443
+extern char *eoNames[eoNR];
3457
+ real act_value[eoObsNR];
3458
+ real av_value[eoObsNR];
3459
+ real ref_value[eoObsNR];
3460
+ int bObsUsed[eoObsNR];
3461
+ int nLJ, nBU, nQ, nIP;
3466
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
3469
+ static FILE *prop;
3470
+ static FILE **out = ((void *) 0);
3471
+ static FILE **qq = ((void *) 0);
3472
+ static FILE **ip = ((void *) 0);
3479
+ if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
3480
+ && (ip == ((void *) 0)))
3482
+ for (i = j = 0; (i < eoObsNR); i++)
3484
+ if (tcr->bObsUsed[i])
3488
+ (__builtin_constant_p (eoNames[i])
3489
+ && ((size_t) (const void *) ((eoNames[i]) + 1) -
3490
+ (size_t) (const void *) (eoNames[i]) ==
3491
+ 1) ? (((const char *) (eoNames[i]))[0] ==
3492
+ '\0' ? (char *) calloc ((size_t) 1,
3511
+ )): __strdup (eoNames[i])));
3514
+ (__builtin_constant_p (buf)
3515
+ && ((size_t) (const void *) ((buf) + 1) -
3516
+ (size_t) (const void *) (buf) ==
3517
+ 1) ? (((const char *) (buf))[0] ==
3518
+ '\0' ? (char *) calloc ((size_t) 1,
3536
+ )): __strdup (buf)));
3541
+ for (i = 0; (i < tcr->nLJ); i++)
3543
+ if (tcr->tcLJ[i].bPrint)
3545
+ xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
3554
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
3555
+ int step, real ener[], t_forcerec * fr, t_inputrec * ir,
3556
+ int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
3557
+ int nmols, t_commrec * cr, matrix box, tensor virial,
3558
+ tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
3560
+ int i, j, ati, atj, atnr2, type, ftype;
3561
+ real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
3562
+ real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
3563
+ int bTest, bPrint;
3564
+ t_coupl_iparams *tip;
3567
+ pr_ff (tcr, t, idef, cr, nfile, fnm);
3569
+ for (i = 0; (i < eoObsNR); i++)
3572
+ calc_deviation (tcr->av_value[i], tcr->act_value[i],
3573
+ tcr->ref_value[i]);
3574
+ prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
3577
+ pr_dev (tcr, t, prdev, cr, nfile, fnm);
3578
+ for (i = 0; (i < atnr2); i++)
3580
+ factor = dt * deviation[tip->eObs];
3584
+ if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
3585
+ idef->iparams[type].harmonic.krA *=
3586
+ (1 + factor / tip->xi.harmonic.krA);
3590
--- a/src/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
3591
+++ b/src/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
3593
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3594
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3595
+/* { dg-require-effective-target powerpc_vsx_ok } */
3596
+/* { dg-options "-mcpu=power7 -O2" } */
3597
+/* { dg-final { scan-assembler-not "lbarx" } } */
3598
+/* { dg-final { scan-assembler-not "lharx" } } */
3599
+/* { dg-final { scan-assembler-times "lwarx" 18 } } */
3600
+/* { dg-final { scan-assembler-times "ldarx" 6 } } */
3601
+/* { dg-final { scan-assembler-not "lqarx" } } */
3602
+/* { dg-final { scan-assembler-not "stbcx" } } */
3603
+/* { dg-final { scan-assembler-not "sthcx" } } */
3604
+/* { dg-final { scan-assembler-times "stwcx" 18 } } */
3605
+/* { dg-final { scan-assembler-times "stdcx" 6 } } */
3606
+/* { dg-final { scan-assembler-not "stqcx" } } */
3607
+/* { dg-final { scan-assembler-times "bl __atomic" 6 } } */
3608
+/* { dg-final { scan-assembler-times "isync" 12 } } */
3609
+/* { dg-final { scan-assembler-times "lwsync" 8 } } */
3610
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
3611
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
3612
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
3613
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
3614
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
3616
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
3618
+char_fetch_add_relaxed (char *ptr, int value)
3620
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3624
+char_fetch_sub_consume (char *ptr, int value)
3626
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3630
+char_fetch_and_acquire (char *ptr, int value)
3632
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3636
+char_fetch_ior_release (char *ptr, int value)
3638
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3642
+char_fetch_xor_acq_rel (char *ptr, int value)
3644
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3648
+char_fetch_nand_seq_cst (char *ptr, int value)
3650
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3653
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
3655
+short_fetch_add_relaxed (short *ptr, int value)
3657
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3661
+short_fetch_sub_consume (short *ptr, int value)
3663
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3667
+short_fetch_and_acquire (short *ptr, int value)
3669
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3673
+short_fetch_ior_release (short *ptr, int value)
3675
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3679
+short_fetch_xor_acq_rel (short *ptr, int value)
3681
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3685
+short_fetch_nand_seq_cst (short *ptr, int value)
3687
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3690
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
3692
+int_fetch_add_relaxed (int *ptr, int value)
3694
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3698
+int_fetch_sub_consume (int *ptr, int value)
3700
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3704
+int_fetch_and_acquire (int *ptr, int value)
3706
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3710
+int_fetch_ior_release (int *ptr, int value)
3712
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3716
+int_fetch_xor_acq_rel (int *ptr, int value)
3718
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3722
+int_fetch_nand_seq_cst (int *ptr, int value)
3724
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3727
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
3729
+long_fetch_add_relaxed (long *ptr, long value)
3731
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3735
+long_fetch_sub_consume (long *ptr, long value)
3737
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3741
+long_fetch_and_acquire (long *ptr, long value)
3743
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3747
+long_fetch_ior_release (long *ptr, long value)
3749
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3753
+long_fetch_xor_acq_rel (long *ptr, long value)
3755
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3759
+long_fetch_nand_seq_cst (long *ptr, long value)
3761
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3764
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
3766
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
3768
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3772
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
3774
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3778
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
3780
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3784
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
3786
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3790
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
3792
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3796
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
3798
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3800
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-3.c
3801
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-3.c
3803
/* { dg-do compile { target { { powerpc*-*-* } && { ! powerpc*-apple-darwin* } } } } */
3804
/* { dg-require-effective-target powerpc_fprs } */
3805
/* { dg-options "-O2 -mrecip -ffast-math -mcpu=power7" } */
3806
-/* { dg-final { scan-assembler-times "xsrsqrtedp" 1 } } */
3807
+/* { dg-final { scan-assembler-times "xsrsqrtedp\|frsqrte\ " 1 } } */
3808
/* { dg-final { scan-assembler-times "xsmsub.dp\|fmsub\ " 1 } } */
3809
-/* { dg-final { scan-assembler-times "xsmuldp" 4 } } */
3810
+/* { dg-final { scan-assembler-times "xsmuldp\|fmul\ " 4 } } */
3811
/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 2 } } */
3812
-/* { dg-final { scan-assembler-times "frsqrtes" 1 } } */
3813
-/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
3814
-/* { dg-final { scan-assembler-times "fmuls" 4 } } */
3815
-/* { dg-final { scan-assembler-times "fnmsubs" 2 } } */
3816
+/* { dg-final { scan-assembler-times "xsrsqrtesp\|frsqrtes" 1 } } */
3817
+/* { dg-final { scan-assembler-times "xsmsub.sp\|fmsubs" 1 } } */
3818
+/* { dg-final { scan-assembler-times "xsmulsp\|fmuls" 2 } } */
3819
+/* { dg-final { scan-assembler-times "xsnmsub.sp\|fnmsubs" 1 } } */
3823
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-2.c
3824
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-2.c
3826
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3827
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
3828
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
3829
/* { dg-options "-O2 -mpointers-to-nested-functions" } */
3832
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
3833
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
3835
+/* { dg-do compile { target { powerpc*-*-* } } } */
3836
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3837
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3838
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
3840
+float load_sf (float *p)
3843
+ __asm__ ("# reg %x0" : "+v" (f));
3847
+double load_df (double *p)
3850
+ __asm__ ("# reg %x0" : "+v" (d));
3854
+double load_dfsf (float *p)
3856
+ double d = (double) *p;
3857
+ __asm__ ("# reg %x0" : "+v" (d));
3861
+void store_sf (float *p, float f)
3863
+ __asm__ ("# reg %x0" : "+v" (f));
3867
+void store_df (double *p, double d)
3869
+ __asm__ ("# reg %x0" : "+v" (d));
3873
+/* { dg-final { scan-assembler "lxsspx" } } */
3874
+/* { dg-final { scan-assembler "lxsdx" } } */
3875
+/* { dg-final { scan-assembler "stxsspx" } } */
3876
+/* { dg-final { scan-assembler "stxsdx" } } */
3877
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
3878
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
3880
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3881
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3882
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3883
+/* { dg-options "-O2 -mcpu=power8" } */
3884
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
3885
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
3886
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
3887
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
3888
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
3889
+/* { dg-final { scan-assembler "\[ \t\]eqv " } } */
3890
+/* { dg-final { scan-assembler "\[ \t\]orc " } } */
3891
+/* { dg-final { scan-assembler "\[ \t\]nand " } } */
3892
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
3893
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
3894
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
3895
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
3896
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
3897
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
3898
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
3899
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
3900
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
3901
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
3902
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
3903
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
3904
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
3908
+#define TYPE __int128_t
3910
+typedef int v4si __attribute__ ((vector_size (16)));
3916
--- a/src/gcc/testsuite/gcc.target/powerpc/htm-xl-intrin-1.c
3917
+++ b/src/gcc/testsuite/gcc.target/powerpc/htm-xl-intrin-1.c
3919
+/* This checks the availability of the XL compiler intrinsics for
3920
+ transactional execution with the expected prototypes. */
3922
+/* { dg-do compile { target { powerpc*-*-* } } } */
3923
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3924
+/* { dg-require-effective-target powerpc_htm_ok } */
3925
+/* { dg-options "-O2 -mhtm" } */
3927
+#include <htmxlintrin.h>
3930
+foo (void *TM_buff, long *result, unsigned char *code)
3932
+ *result++ = __TM_simple_begin ();
3933
+ *result++ = __TM_begin (TM_buff);
3934
+ *result++ = __TM_end ();
3936
+ __TM_named_abort (*code);
3939
+ *result++ = __TM_is_user_abort (TM_buff);
3940
+ *result++ = __TM_is_named_user_abort (TM_buff, code);
3941
+ *result++ = __TM_is_illegal (TM_buff);
3942
+ *result++ = __TM_is_footprint_exceeded (TM_buff);
3943
+ *result++ = __TM_nesting_depth (TM_buff);
3944
+ *result++ = __TM_is_nested_too_deep (TM_buff);
3945
+ *result++ = __TM_is_conflict (TM_buff);
3946
+ *result++ = __TM_is_failure_persistent (TM_buff);
3947
+ *result++ = __TM_failure_address (TM_buff);
3948
+ *result++ = __TM_failure_code (TM_buff);
3951
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
3952
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
3954
+/* { dg-do compile { target { powerpc*-*-* } } } */
3955
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3956
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3957
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
3959
+#include <altivec.h>
3961
+typedef vector long long vll_sign;
3962
+typedef vector unsigned long long vll_uns;
3963
+typedef vector bool long long vll_bool;
3965
+typedef vector int vi_sign;
3966
+typedef vector unsigned int vi_uns;
3967
+typedef vector bool int vi_bool;
3969
+typedef vector short vs_sign;
3970
+typedef vector unsigned short vs_uns;
3971
+typedef vector bool short vs_bool;
3973
+typedef vector signed char vc_sign;
3974
+typedef vector unsigned char vc_uns;
3975
+typedef vector bool char vc_bool;
3977
+vll_sign vll_clz_1 (vll_sign a)
3979
+ return __builtin_altivec_vclzd (a);
3982
+vll_sign vll_clz_2 (vll_sign a)
3984
+ return vec_vclz (a);
3987
+vll_sign vll_clz_3 (vll_sign a)
3989
+ return vec_vclzd (a);
3992
+vll_uns vll_clz_4 (vll_uns a)
3994
+ return vec_vclz (a);
3997
+vll_uns vll_clz_5 (vll_uns a)
3999
+ return vec_vclzd (a);
4002
+vi_sign vi_clz_1 (vi_sign a)
4004
+ return __builtin_altivec_vclzw (a);
4007
+vi_sign vi_clz_2 (vi_sign a)
4009
+ return vec_vclz (a);
4012
+vi_sign vi_clz_3 (vi_sign a)
4014
+ return vec_vclzw (a);
4017
+vi_uns vi_clz_4 (vi_uns a)
4019
+ return vec_vclz (a);
4022
+vi_uns vi_clz_5 (vi_uns a)
4024
+ return vec_vclzw (a);
4027
+vs_sign vs_clz_1 (vs_sign a)
4029
+ return __builtin_altivec_vclzh (a);
4032
+vs_sign vs_clz_2 (vs_sign a)
4034
+ return vec_vclz (a);
4037
+vs_sign vs_clz_3 (vs_sign a)
4039
+ return vec_vclzh (a);
4042
+vs_uns vs_clz_4 (vs_uns a)
4044
+ return vec_vclz (a);
4047
+vs_uns vs_clz_5 (vs_uns a)
4049
+ return vec_vclzh (a);
4052
+vc_sign vc_clz_1 (vc_sign a)
4054
+ return __builtin_altivec_vclzb (a);
4057
+vc_sign vc_clz_2 (vc_sign a)
4059
+ return vec_vclz (a);
4062
+vc_sign vc_clz_3 (vc_sign a)
4064
+ return vec_vclzb (a);
4067
+vc_uns vc_clz_4 (vc_uns a)
4069
+ return vec_vclz (a);
4072
+vc_uns vc_clz_5 (vc_uns a)
4074
+ return vec_vclzb (a);
4077
+vll_sign vll_popcnt_1 (vll_sign a)
4079
+ return __builtin_altivec_vpopcntd (a);
4082
+vll_sign vll_popcnt_2 (vll_sign a)
4084
+ return vec_vpopcnt (a);
4087
+vll_sign vll_popcnt_3 (vll_sign a)
4089
+ return vec_vpopcntd (a);
4092
+vll_uns vll_popcnt_4 (vll_uns a)
4094
+ return vec_vpopcnt (a);
4097
+vll_uns vll_popcnt_5 (vll_uns a)
4099
+ return vec_vpopcntd (a);
4102
+vi_sign vi_popcnt_1 (vi_sign a)
4104
+ return __builtin_altivec_vpopcntw (a);
4107
+vi_sign vi_popcnt_2 (vi_sign a)
4109
+ return vec_vpopcnt (a);
4112
+vi_sign vi_popcnt_3 (vi_sign a)
4114
+ return vec_vpopcntw (a);
4117
+vi_uns vi_popcnt_4 (vi_uns a)
4119
+ return vec_vpopcnt (a);
4122
+vi_uns vi_popcnt_5 (vi_uns a)
4124
+ return vec_vpopcntw (a);
4127
+vs_sign vs_popcnt_1 (vs_sign a)
4129
+ return __builtin_altivec_vpopcnth (a);
4132
+vs_sign vs_popcnt_2 (vs_sign a)
4134
+ return vec_vpopcnt (a);
4137
+vs_sign vs_popcnt_3 (vs_sign a)
4139
+ return vec_vpopcnth (a);
4142
+vs_uns vs_popcnt_4 (vs_uns a)
4144
+ return vec_vpopcnt (a);
4147
+vs_uns vs_popcnt_5 (vs_uns a)
4149
+ return vec_vpopcnth (a);
4152
+vc_sign vc_popcnt_1 (vc_sign a)
4154
+ return __builtin_altivec_vpopcntb (a);
4157
+vc_sign vc_popcnt_2 (vc_sign a)
4159
+ return vec_vpopcnt (a);
4162
+vc_sign vc_popcnt_3 (vc_sign a)
4164
+ return vec_vpopcntb (a);
4167
+vc_uns vc_popcnt_4 (vc_uns a)
4169
+ return vec_vpopcnt (a);
4172
+vc_uns vc_popcnt_5 (vc_uns a)
4174
+ return vec_vpopcntb (a);
4177
+vc_uns vc_gbb_1 (vc_uns a)
4179
+ return __builtin_altivec_vgbbd (a);
4182
+vc_sign vc_gbb_2 (vc_sign a)
4184
+ return vec_vgbbd (a);
4187
+vc_uns vc_gbb_3 (vc_uns a)
4189
+ return vec_vgbbd (a);
4192
+/* { dg-final { scan-assembler-times "vclzd" 5 } } */
4193
+/* { dg-final { scan-assembler-times "vclzw" 5 } } */
4194
+/* { dg-final { scan-assembler-times "vclzh" 5 } } */
4195
+/* { dg-final { scan-assembler-times "vclzb" 5 } } */
4197
+/* { dg-final { scan-assembler-times "vpopcntd" 5 } } */
4198
+/* { dg-final { scan-assembler-times "vpopcntw" 5 } } */
4199
+/* { dg-final { scan-assembler-times "vpopcnth" 5 } } */
4200
+/* { dg-final { scan-assembler-times "vpopcntb" 5 } } */
4202
+/* { dg-final { scan-assembler-times "vgbbd" 3 } } */
4203
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-av.c
4204
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-av.c
4206
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4207
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4208
+/* { dg-require-effective-target powerpc_altivec_ok } */
4209
+/* { dg-options "-O2 -mcpu=power6 -mabi=altivec -maltivec -mno-vsx" } */
4210
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
4211
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
4212
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
4213
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
4214
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
4215
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
4216
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
4217
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
4218
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
4219
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
4220
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
4221
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
4222
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
4223
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
4224
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
4225
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
4226
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
4227
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
4229
+/* On altivec, for 128-bit types, ORC/ANDC/EQV might not show up, since the
4230
+ vector unit doesn't support these, so the appropriate combine patterns may
4231
+ not be generated. */
4235
+#define TYPE __int128_t
4237
+typedef int v4si __attribute__ ((vector_size (16)));
4243
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
4244
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
4246
+/* { dg-do compile { target { powerpc*-*-* } } } */
4247
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4248
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4249
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4259
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
4261
+#define DO_BUILTIN(PREFIX, TYPE, CLZ, POPCNT) \
4262
+TYPE PREFIX ## _a[SIZE] ALIGN_ATTR; \
4263
+TYPE PREFIX ## _b[SIZE] ALIGN_ATTR; \
4266
+PREFIX ## _clz (void) \
4268
+ unsigned long i; \
4270
+ for (i = 0; i < SIZE; i++) \
4271
+ PREFIX ## _a[i] = CLZ (PREFIX ## _b[i]); \
4275
+PREFIX ## _popcnt (void) \
4277
+ unsigned long i; \
4279
+ for (i = 0; i < SIZE; i++) \
4280
+ PREFIX ## _a[i] = POPCNT (PREFIX ## _b[i]); \
4283
+#if !defined(DO_LONG_LONG) && !defined(DO_LONG) && !defined(DO_INT) && !defined(DO_SHORT) && !defined(DO_CHAR)
4288
+/* At the moment, only int is auto vectorized. */
4289
+DO_BUILTIN (sll, long long, __builtin_clzll, __builtin_popcountll)
4290
+DO_BUILTIN (ull, unsigned long long, __builtin_clzll, __builtin_popcountll)
4293
+#if defined(_ARCH_PPC64) && DO_LONG
4294
+DO_BUILTIN (sl, long, __builtin_clzl, __builtin_popcountl)
4295
+DO_BUILTIN (ul, unsigned long, __builtin_clzl, __builtin_popcountl)
4299
+DO_BUILTIN (si, int, __builtin_clz, __builtin_popcount)
4300
+DO_BUILTIN (ui, unsigned int, __builtin_clz, __builtin_popcount)
4304
+DO_BUILTIN (ss, short, __builtin_clz, __builtin_popcount)
4305
+DO_BUILTIN (us, unsigned short, __builtin_clz, __builtin_popcount)
4309
+DO_BUILTIN (sc, signed char, __builtin_clz, __builtin_popcount)
4310
+DO_BUILTIN (uc, unsigned char, __builtin_clz, __builtin_popcount)
4313
+/* { dg-final { scan-assembler-times "vclzw" 2 } } */
4314
+/* { dg-final { scan-assembler-times "vpopcntw" 2 } } */
4315
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57949-1.c
4316
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57949-1.c
4318
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
4319
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4320
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
4321
+/* { dg-options "-O2 -mcpu=power7 -mno-compat-align-parm" } */
4323
+/* Verify that vs is 16-byte aligned with -mcompat-align-parm. */
4325
+typedef float v4sf __attribute__ ((vector_size (16)));
4326
+struct s { long m; v4sf v; };
4330
+void pr57949 (long d1, long d2, long d3, long d4, long d5, long d6,
4331
+ long d7, long d8, long d9, struct s vs) {
4336
+/* { dg-final { scan-assembler "li \.\*,144" } } */
4337
+/* { dg-final { scan-assembler "ld \.\*,128\\(1\\)" } } */
4338
--- a/src/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
4339
+++ b/src/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
4341
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4342
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4343
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4344
+/* { dg-options "-mcpu=power8 -O2" } */
4345
+/* { dg-final { scan-assembler-times "lbarx" 7 } } */
4346
+/* { dg-final { scan-assembler-times "lharx" 7 } } */
4347
+/* { dg-final { scan-assembler-times "lwarx" 7 } } */
4348
+/* { dg-final { scan-assembler-times "ldarx" 7 } } */
4349
+/* { dg-final { scan-assembler-times "lqarx" 7 } } */
4350
+/* { dg-final { scan-assembler-times "stbcx" 7 } } */
4351
+/* { dg-final { scan-assembler-times "sthcx" 7 } } */
4352
+/* { dg-final { scan-assembler-times "stwcx" 7 } } */
4353
+/* { dg-final { scan-assembler-times "stdcx" 7 } } */
4354
+/* { dg-final { scan-assembler-times "stqcx" 7 } } */
4355
+/* { dg-final { scan-assembler-not "bl __atomic" } } */
4356
+/* { dg-final { scan-assembler-times "isync" 20 } } */
4357
+/* { dg-final { scan-assembler-times "lwsync" 10 } } */
4358
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
4359
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
4360
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
4361
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
4362
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
4364
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
4366
+char_fetch_add_relaxed (char *ptr, int value)
4368
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4372
+char_fetch_sub_consume (char *ptr, int value)
4374
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4378
+char_fetch_and_acquire (char *ptr, int value)
4380
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4384
+char_fetch_ior_release (char *ptr, int value)
4386
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4390
+char_fetch_xor_acq_rel (char *ptr, int value)
4392
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4396
+char_fetch_nand_seq_cst (char *ptr, int value)
4398
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4402
+char_val_compare_and_swap (char *p, int i, int j, char *q)
4404
+ *q = __sync_val_compare_and_swap (p, i, j);
4407
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
4409
+short_fetch_add_relaxed (short *ptr, int value)
4411
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4415
+short_fetch_sub_consume (short *ptr, int value)
4417
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4421
+short_fetch_and_acquire (short *ptr, int value)
4423
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4427
+short_fetch_ior_release (short *ptr, int value)
4429
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4433
+short_fetch_xor_acq_rel (short *ptr, int value)
4435
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4439
+short_fetch_nand_seq_cst (short *ptr, int value)
4441
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4445
+short_val_compare_and_swap (short *p, int i, int j, short *q)
4447
+ *q = __sync_val_compare_and_swap (p, i, j);
4450
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
4452
+int_fetch_add_relaxed (int *ptr, int value)
4454
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4458
+int_fetch_sub_consume (int *ptr, int value)
4460
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4464
+int_fetch_and_acquire (int *ptr, int value)
4466
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4470
+int_fetch_ior_release (int *ptr, int value)
4472
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4476
+int_fetch_xor_acq_rel (int *ptr, int value)
4478
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4482
+int_fetch_nand_seq_cst (int *ptr, int value)
4484
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4488
+int_val_compare_and_swap (int *p, int i, int j, int *q)
4490
+ *q = __sync_val_compare_and_swap (p, i, j);
4493
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
4495
+long_fetch_add_relaxed (long *ptr, long value)
4497
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4501
+long_fetch_sub_consume (long *ptr, long value)
4503
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4507
+long_fetch_and_acquire (long *ptr, long value)
4509
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4513
+long_fetch_ior_release (long *ptr, long value)
4515
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4519
+long_fetch_xor_acq_rel (long *ptr, long value)
4521
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4525
+long_fetch_nand_seq_cst (long *ptr, long value)
4527
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4531
+long_val_compare_and_swap (long *p, long i, long j, long *q)
4533
+ *q = __sync_val_compare_and_swap (p, i, j);
4536
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
4538
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
4540
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4544
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
4546
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4550
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
4552
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4556
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
4558
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4562
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
4564
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4568
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
4570
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4574
+quad_val_compare_and_swap (__int128_t *p, __int128_t i, __int128_t j, __int128_t *q)
4576
+ *q = __sync_val_compare_and_swap (p, i, j);
4578
--- a/src/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
4579
+++ b/src/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
4581
+/* { dg-do compile { target { powerpc*-*-* } } } */
4582
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4583
+/* { dg-require-effective-target powerpc_vsx_ok } */
4584
+/* { dg-options "-O2 -mcpu=power6 -mhard-dfp" } */
4585
+/* { dg-final { scan-assembler-not "lfiwzx" } } */
4586
+/* { dg-final { scan-assembler-times "lfd" 2 } } */
4587
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
4588
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
4589
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
4591
+/* Test that for power6 we need to use a bounce buffer on the stack to load
4592
+ SDmode variables because the power6 does not have a way to directly load
4593
+ 32-bit values from memory. */
4596
+void inc_dec32 (void)
4598
+ a += (_Decimal32) 1.0;
4600
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-4.c
4601
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-4.c
4603
/* { dg-final { scan-assembler-times "xvnmsub.dp" 2 } } */
4604
/* { dg-final { scan-assembler-times "xvrsqrtesp" 1 } } */
4605
/* { dg-final { scan-assembler-times "xvmsub.sp" 1 } } */
4606
-/* { dg-final { scan-assembler-times "xvmulsp" 4 } } */
4607
-/* { dg-final { scan-assembler-times "xvnmsub.sp" 2 } } */
4608
+/* { dg-final { scan-assembler-times "xvmulsp" 2 } } */
4609
+/* { dg-final { scan-assembler-times "xvnmsub.sp" 1 } } */
4613
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-3.c
4614
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-3.c
4616
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4617
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
4618
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
4619
/* { dg-options "-O2 -mno-pointers-to-nested-functions" } */
4621
extern void ext_call (int (func) (void));
4622
--- a/src/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
4623
+++ b/src/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
4625
+/* { dg-do compile { target { powerpc*-*-* } } } */
4626
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4627
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4628
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4630
+typedef vector unsigned long long crypto_t;
4631
+typedef vector unsigned long long v2di_t;
4632
+typedef vector unsigned int v4si_t;
4633
+typedef vector unsigned short v8hi_t;
4634
+typedef vector unsigned char v16qi_t;
4636
+crypto_t crpyto1 (crypto_t a)
4638
+ return __builtin_crypto_vsbox (a);
4641
+crypto_t crypto2 (crypto_t a, crypto_t b)
4643
+ return __builtin_crypto_vcipher (a, b);
4646
+crypto_t crypto3 (crypto_t a, crypto_t b)
4648
+ return __builtin_crypto_vcipherlast (a, b);
4651
+crypto_t crypto4 (crypto_t a, crypto_t b)
4653
+ return __builtin_crypto_vncipher (a, b);
4656
+crypto_t crypto5 (crypto_t a, crypto_t b)
4658
+ return __builtin_crypto_vncipherlast (a, b);
4661
+v16qi_t crypto6a (v16qi_t a, v16qi_t b, v16qi_t c)
4663
+ return __builtin_crypto_vpermxor (a, b, c);
4666
+v8hi_t crypto6b (v8hi_t a, v8hi_t b, v8hi_t c)
4668
+ return __builtin_crypto_vpermxor (a, b, c);
4671
+v4si_t crypto6c (v4si_t a, v4si_t b, v4si_t c)
4673
+ return __builtin_crypto_vpermxor (a, b, c);
4676
+v2di_t crypto6d (v2di_t a, v2di_t b, v2di_t c)
4678
+ return __builtin_crypto_vpermxor (a, b, c);
4681
+v16qi_t crypto7a (v16qi_t a, v16qi_t b)
4683
+ return __builtin_crypto_vpmsumb (a, b);
4686
+v16qi_t crypto7b (v16qi_t a, v16qi_t b)
4688
+ return __builtin_crypto_vpmsum (a, b);
4691
+v8hi_t crypto7c (v8hi_t a, v8hi_t b)
4693
+ return __builtin_crypto_vpmsumh (a, b);
4696
+v8hi_t crypto7d (v8hi_t a, v8hi_t b)
4698
+ return __builtin_crypto_vpmsum (a, b);
4701
+v4si_t crypto7e (v4si_t a, v4si_t b)
4703
+ return __builtin_crypto_vpmsumw (a, b);
4706
+v4si_t crypto7f (v4si_t a, v4si_t b)
4708
+ return __builtin_crypto_vpmsum (a, b);
4711
+v2di_t crypto7g (v2di_t a, v2di_t b)
4713
+ return __builtin_crypto_vpmsumd (a, b);
4716
+v2di_t crypto7h (v2di_t a, v2di_t b)
4718
+ return __builtin_crypto_vpmsum (a, b);
4721
+v2di_t crypto8a (v2di_t a)
4723
+ return __builtin_crypto_vshasigmad (a, 0, 8);
4726
+v2di_t crypto8b (v2di_t a)
4728
+ return __builtin_crypto_vshasigma (a, 0, 8);
4731
+v4si_t crypto8c (v4si_t a)
4733
+ return __builtin_crypto_vshasigmaw (a, 1, 15);
4736
+v4si_t crypto8d (v4si_t a)
4738
+ return __builtin_crypto_vshasigma (a, 1, 15);
4741
+/* Note space is used after the instruction so that vcipherlast does not match
4743
+/* { dg-final { scan-assembler-times "vcipher " 1 } } */
4744
+/* { dg-final { scan-assembler-times "vcipherlast " 1 } } */
4745
+/* { dg-final { scan-assembler-times "vncipher " 1 } } */
4746
+/* { dg-final { scan-assembler-times "vncipherlast " 1 } } */
4747
+/* { dg-final { scan-assembler-times "vpermxor " 4 } } */
4748
+/* { dg-final { scan-assembler-times "vpmsumb " 2 } } */
4749
+/* { dg-final { scan-assembler-times "vpmsumd " 2 } } */
4750
+/* { dg-final { scan-assembler-times "vpmsumh " 2 } } */
4751
+/* { dg-final { scan-assembler-times "vpmsumw " 2 } } */
4752
+/* { dg-final { scan-assembler-times "vsbox " 1 } } */
4753
+/* { dg-final { scan-assembler-times "vshasigmad " 2 } } */
4754
+/* { dg-final { scan-assembler-times "vshasigmaw " 2 } } */
4755
--- a/src/gcc/testsuite/gcc.target/powerpc/pr42747.c
4756
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr42747.c
4759
double foo (double x) { return __builtin_sqrt (x); }
4761
-/* { dg-final { scan-assembler "xssqrtdp" } } */
4762
+/* { dg-final { scan-assembler "xssqrtdp\|fsqrt" } } */
4763
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c
4764
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c
4766
+/* Test generation of DFP instructions for POWER6. */
4767
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
4768
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
4770
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
4771
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
4772
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
4773
+/* { dg-final { scan-assembler-times "fmr" 0 } } */
4776
+func1 (_Decimal64 a, _Decimal64 b)
4782
+func2 (_Decimal64 a, _Decimal64 b)
4784
+ return __builtin_fabsd64 (b);
4788
+func3 (_Decimal64 a, _Decimal64 b)
4790
+ return - __builtin_fabsd64 (b);
4792
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
4793
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
4795
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
4796
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4797
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
4798
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4799
+/* { dg-options "-mcpu=power8 -O2" } */
4800
+/* { dg-final { scan-assembler "mtvsrd" } } */
4801
+/* { dg-final { scan-assembler "mfvsrd" } } */
4802
+/* { dg-final { scan-assembler "xscvdpspn" } } */
4803
+/* { dg-final { scan-assembler "xscvspdpn" } } */
4805
+/* Check code generation for direct move for float types. */
4809
+#define NO_ALTIVEC 1
4810
+#define VSX_REG_ATTR "ww"
4812
+#include "direct-move.h"
4813
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-td-2.c
4814
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-td-2.c
4816
+/* Test generation of DFP instructions for POWER6. */
4817
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
4818
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
4820
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
4821
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
4822
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
4823
+/* { dg-final { scan-assembler-times "fmr" 0 } } */
4825
+/* These tests verify we only generate fneg, fabs and fnabs
4826
+ instructions and no fmr's since these are done in place. */
4829
+func1 (_Decimal128 a)
4835
+func2 (_Decimal128 a)
4837
+ return __builtin_fabsd128 (a);
4841
+func3 (_Decimal128 a)
4843
+ return - __builtin_fabsd128 (a);
4845
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
4846
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
4848
+/* { dg-do compile { target { powerpc*-*-* } } } */
4849
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4850
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4851
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4853
+#include <altivec.h>
4864
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
4867
+#define DOIT(TYPE, PREFIX) \
4868
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b) \
4870
+ return vec_eqv (a, b); \
4873
+TYPE PREFIX ## _eqv_arith (TYPE a, TYPE b) \
4875
+ return ~(a ^ b); \
4878
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b) \
4880
+ return vec_nand (a, b); \
4883
+TYPE PREFIX ## _nand_arith1 (TYPE a, TYPE b) \
4885
+ return ~(a & b); \
4888
+TYPE PREFIX ## _nand_arith2 (TYPE a, TYPE b) \
4890
+ return (~a) | (~b); \
4893
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b) \
4895
+ return vec_orc (a, b); \
4898
+TYPE PREFIX ## _orc_arith1 (TYPE a, TYPE b) \
4900
+ return (~ a) | b; \
4903
+TYPE PREFIX ## _orc_arith2 (TYPE a, TYPE b) \
4905
+ return a | (~ b); \
4908
+#define DOIT_FLOAT(TYPE, PREFIX) \
4909
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b) \
4911
+ return vec_eqv (a, b); \
4914
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b) \
4916
+ return vec_nand (a, b); \
4919
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b) \
4921
+ return vec_orc (a, b); \
4924
+typedef vector signed char sign_char_vec;
4925
+typedef vector short sign_short_vec;
4926
+typedef vector int sign_int_vec;
4927
+typedef vector long long sign_llong_vec;
4929
+typedef vector unsigned char uns_char_vec;
4930
+typedef vector unsigned short uns_short_vec;
4931
+typedef vector unsigned int uns_int_vec;
4932
+typedef vector unsigned long long uns_llong_vec;
4934
+typedef vector float float_vec;
4935
+typedef vector double double_vec;
4937
+DOIT(sign_char_vec, sign_char)
4938
+DOIT(sign_short_vec, sign_short)
4939
+DOIT(sign_int_vec, sign_int)
4940
+DOIT(sign_llong_vec, sign_llong)
4942
+DOIT(uns_char_vec, uns_char)
4943
+DOIT(uns_short_vec, uns_short)
4944
+DOIT(uns_int_vec, uns_int)
4945
+DOIT(uns_llong_vec, uns_llong)
4947
+DOIT_FLOAT(float_vec, float)
4948
+DOIT_FLOAT(double_vec, double)
4950
+/* { dg-final { scan-assembler-times "xxleqv" 18 } } */
4951
+/* { dg-final { scan-assembler-times "xxlnand" 26 } } */
4952
+/* { dg-final { scan-assembler-times "xxlorc" 26 } } */
4953
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
4954
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
4956
+/* { dg-do compile { target { powerpc*-*-* } } } */
4957
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4958
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4959
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4970
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
4974
+#define TYPE unsigned int
4977
+TYPE in1 [SIZE] ATTR_ALIGN;
4978
+TYPE in2 [SIZE] ATTR_ALIGN;
4979
+TYPE eqv [SIZE] ATTR_ALIGN;
4980
+TYPE nand1[SIZE] ATTR_ALIGN;
4981
+TYPE nand2[SIZE] ATTR_ALIGN;
4982
+TYPE orc1 [SIZE] ATTR_ALIGN;
4983
+TYPE orc2 [SIZE] ATTR_ALIGN;
4990
+ for (i = 0; i < SIZE; i++)
4992
+ eqv[i] = ~(in1[i] ^ in2[i]);
5001
+ for (i = 0; i < SIZE; i++)
5003
+ nand1[i] = ~(in1[i] & in2[i]);
5012
+ for (i = 0; i < SIZE; i++)
5014
+ nand2[i] = (~in1[i]) | (~in2[i]);
5023
+ for (i = 0; i < SIZE; i++)
5025
+ orc1[i] = (~in1[i]) | in2[i];
5034
+ for (i = 0; i < SIZE; i++)
5036
+ orc1[i] = in1[i] | (~in2[i]);
5040
+/* { dg-final { scan-assembler-times "xxleqv" 1 } } */
5041
+/* { dg-final { scan-assembler-times "xxlnand" 2 } } */
5042
+/* { dg-final { scan-assembler-times "xxlorc" 2 } } */
5043
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57949-2.c
5044
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57949-2.c
5046
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
5047
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5048
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
5049
+/* { dg-options "-O2 -mcpu=power7" } */
5051
+/* Verify that vs is not 16-byte aligned in the absence of -mno-compat-align-parm. */
5053
+typedef float v4sf __attribute__ ((vector_size (16)));
5054
+struct s { long m; v4sf v; };
5058
+void pr57949 (long d1, long d2, long d3, long d4, long d5, long d6,
5059
+ long d7, long d8, long d9, struct s vs) {
5064
+/* { dg-final { scan-assembler "ld .\*,136\\(1\\)" } } */
5065
+/* { dg-final { scan-assembler "ld .\*,120\\(1\\)" } } */
5066
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-5.c
5067
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-5.c
5069
/* { dg-options "-O3 -ftree-vectorize -mrecip=all -ffast-math -mcpu=power7 -fno-unroll-loops" } */
5070
/* { dg-final { scan-assembler-times "xvredp" 4 } } */
5071
/* { dg-final { scan-assembler-times "xvresp" 5 } } */
5072
-/* { dg-final { scan-assembler-times "xsredp" 2 } } */
5073
-/* { dg-final { scan-assembler-times "fres" 2 } } */
5074
+/* { dg-final { scan-assembler-times "xsredp\|fre\ " 2 } } */
5075
+/* { dg-final { scan-assembler-times "xsresp\|fres" 2 } } */
5076
+/* { dg-final { scan-assembler-times "xsmulsp\|fmuls" 2 } } */
5077
+/* { dg-final { scan-assembler-times "xsnmsub.sp\|fnmsubs" 2 } } */
5078
+/* { dg-final { scan-assembler-times "xsmuldp\|fmul\ " 2 } } */
5079
+/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 4 } } */
5080
+/* { dg-final { scan-assembler-times "xvmulsp" 7 } } */
5081
+/* { dg-final { scan-assembler-times "xvnmsub.sp" 5 } } */
5082
+/* { dg-final { scan-assembler-times "xvmuldp" 6 } } */
5083
+/* { dg-final { scan-assembler-times "xvnmsub.dp" 8 } } */
5085
#include <altivec.h>
5087
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
5088
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
5100
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
5101
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
5103
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5104
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5105
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5106
+/* { dg-require-effective-target p8vector_hw } */
5107
+/* { dg-options "-mcpu=power8 -O2" } */
5109
+/* Check whether we get the right bits for direct move at runtime. */
5113
+#define NO_ALTIVEC 1
5115
+#define VSX_REG_ATTR "ww"
5117
+#include "direct-move.h"
5118
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
5119
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
5121
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
5122
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5123
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5124
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5125
+/* { dg-options "-mcpu=power8 -O2" } */
5126
+/* { dg-final { scan-assembler "mtvsrd" } } */
5127
+/* { dg-final { scan-assembler "mfvsrd" } } */
5129
+/* Check code generation for direct move for double types. */
5131
+#define TYPE double
5133
+#define NO_ALTIVEC 1
5134
+#define VSX_REG_ATTR "ws"
5136
+#include "direct-move.h"
5137
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-td-3.c
5138
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-td-3.c
5140
+/* Test generation of DFP instructions for POWER6. */
5141
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
5142
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
5144
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
5145
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
5146
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
5147
+/* { dg-final { scan-assembler-times "fmr" 3 } } */
5149
+/* These tests verify we generate fneg, fabs and fnabs and
5150
+ associated fmr's since these are not done in place. */
5153
+func1 (_Decimal128 a, _Decimal128 b)
5159
+func2 (_Decimal128 a, _Decimal128 b)
5161
+ return __builtin_fabsd128 (b);
5165
+func3 (_Decimal128 a, _Decimal128 b)
5167
+ return - __builtin_fabsd128 (b);
5169
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
5170
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
5172
+/* { dg-do compile { target { powerpc*-*-* } } } */
5173
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5174
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5175
+/* { dg-options "-mcpu=power8 -O2" } */
5177
+vector float dbl_to_float_p8 (double x) { return __builtin_vsx_xscvdpspn (x); }
5178
+double float_to_dbl_p8 (vector float x) { return __builtin_vsx_xscvspdpn (x); }
5180
+/* { dg-final { scan-assembler "xscvdpspn" } } */
5181
+/* { dg-final { scan-assembler "xscvspdpn" } } */
5182
--- a/src/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
5183
+++ b/src/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
5185
/* { dg-final { scan-assembler "xvrspiz" } } */
5186
/* { dg-final { scan-assembler "xsrdpi" } } */
5187
/* { dg-final { scan-assembler "xsrdpic" } } */
5188
-/* { dg-final { scan-assembler "xsrdpim" } } */
5189
-/* { dg-final { scan-assembler "xsrdpip" } } */
5190
-/* { dg-final { scan-assembler "xsrdpiz" } } */
5191
+/* { dg-final { scan-assembler "xsrdpim\|frim" } } */
5192
+/* { dg-final { scan-assembler "xsrdpip\|frip" } } */
5193
+/* { dg-final { scan-assembler "xsrdpiz\|friz" } } */
5194
/* { dg-final { scan-assembler "xsmaxdp" } } */
5195
/* { dg-final { scan-assembler "xsmindp" } } */
5196
/* { dg-final { scan-assembler "xxland" } } */
5197
--- a/src/gcc/testsuite/gcc.target/powerpc/htm-builtin-1.c
5198
+++ b/src/gcc/testsuite/gcc.target/powerpc/htm-builtin-1.c
5200
+/* { dg-do compile { target { powerpc*-*-* } } } */
5201
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5202
+/* { dg-require-effective-target powerpc_htm_ok } */
5203
+/* { dg-options "-O2 -mhtm" } */
5205
+/* { dg-final { scan-assembler-times "tbegin\\." 1 } } */
5206
+/* { dg-final { scan-assembler-times "tend\\." 2 } } */
5207
+/* { dg-final { scan-assembler-times "tabort\\." 2 } } */
5208
+/* { dg-final { scan-assembler-times "tabortdc\\." 1 } } */
5209
+/* { dg-final { scan-assembler-times "tabortdci\\." 1 } } */
5210
+/* { dg-final { scan-assembler-times "tabortwc\\." 1 } } */
5211
+/* { dg-final { scan-assembler-times "tabortwci\\." 2 } } */
5212
+/* { dg-final { scan-assembler-times "tcheck\\." 1 } } */
5213
+/* { dg-final { scan-assembler-times "trechkpt\\." 1 } } */
5214
+/* { dg-final { scan-assembler-times "treclaim\\." 1 } } */
5215
+/* { dg-final { scan-assembler-times "tsr\\." 3 } } */
5216
+/* { dg-final { scan-assembler-times "mfspr" 4 } } */
5217
+/* { dg-final { scan-assembler-times "mtspr" 4 } } */
5219
+void use_builtins (long *p, char code, long *a, long *b)
5221
+ p[0] = __builtin_tbegin (0);
5222
+ p[1] = __builtin_tend (0);
5223
+ p[2] = __builtin_tendall ();
5224
+ p[3] = __builtin_tabort (0);
5225
+ p[4] = __builtin_tabort (code);
5227
+ p[5] = __builtin_tabortdc (0xf, a[5], b[5]);
5228
+ p[6] = __builtin_tabortdci (0xf, a[6], 13);
5229
+ p[7] = __builtin_tabortwc (0xf, a[7], b[7]);
5230
+ p[8] = __builtin_tabortwci (0xf, a[8], 13);
5232
+ p[9] = __builtin_tcheck (5);
5233
+ p[10] = __builtin_trechkpt ();
5234
+ p[11] = __builtin_treclaim (0);
5235
+ p[12] = __builtin_tresume ();
5236
+ p[13] = __builtin_tsuspend ();
5237
+ p[14] = __builtin_tsr (0);
5238
+ p[15] = __builtin_ttest (); /* This expands to a tabortwci. */
5241
+ p[16] = __builtin_get_texasr ();
5242
+ p[17] = __builtin_get_texasru ();
5243
+ p[18] = __builtin_get_tfhar ();
5244
+ p[19] = __builtin_get_tfiar ();
5246
+ __builtin_set_texasr (a[20]);
5247
+ __builtin_set_texasru (a[21]);
5248
+ __builtin_set_tfhar (a[22]);
5249
+ __builtin_set_tfiar (a[23]);
5251
--- a/src/gcc/testsuite/gcc.target/powerpc/bool.c
5252
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool.c
5254
+/* { dg-do compile { target { powerpc*-*-* } } } */
5255
+/* { dg-options "-O2" } */
5256
+/* { dg-final { scan-assembler "eqv" } } */
5257
+/* { dg-final { scan-assembler "nand" } } */
5258
+/* { dg-final { scan-assembler "nor" } } */
5261
+#define TYPE unsigned long
5264
+TYPE op1 (TYPE a, TYPE b) { return ~(a ^ b); } /* eqv */
5265
+TYPE op2 (TYPE a, TYPE b) { return ~(a & b); } /* nand */
5266
+TYPE op3 (TYPE a, TYPE b) { return ~(a | b); } /* nor */
5268
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
5269
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
5271
+/* { dg-do compile { target { powerpc*-*-* } } } */
5272
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5273
+/* { dg-require-effective-target powerpc_altivec_ok } */
5274
+/* { dg-options "-O2 -mcpu=power5 -mabi=altivec -mno-altivec -mno-vsx" } */
5275
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
5276
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
5277
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
5278
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
5279
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
5280
+/* { dg-final { scan-assembler "\[ \t\]eqv " } } */
5281
+/* { dg-final { scan-assembler "\[ \t\]orc " } } */
5282
+/* { dg-final { scan-assembler "\[ \t\]nand " } } */
5283
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
5284
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
5285
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
5286
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
5287
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
5288
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
5289
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
5290
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
5291
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
5292
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
5293
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
5294
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
5295
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
5298
+typedef int v4si __attribute__ ((vector_size (16)));
5303
--- a/src/gcc/testsuite/gcc.target/powerpc/fusion.c
5304
+++ b/src/gcc/testsuite/gcc.target/powerpc/fusion.c
5306
+/* { dg-do compile { target { powerpc*-*-* } } } */
5307
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5308
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
5309
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5310
+/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
5312
+#define LARGE 0x12345
5314
+int fusion_uchar (unsigned char *p){ return p[LARGE]; }
5315
+int fusion_schar (signed char *p){ return p[LARGE]; }
5316
+int fusion_ushort (unsigned short *p){ return p[LARGE]; }
5317
+int fusion_short (short *p){ return p[LARGE]; }
5318
+int fusion_int (int *p){ return p[LARGE]; }
5319
+unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
5321
+vector double fusion_vector (vector double *p) { return p[2]; }
5323
+/* { dg-final { scan-assembler-times "gpr load fusion" 6 } } */
5324
+/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */
5325
+/* { dg-final { scan-assembler-times "lbz" 2 } } */
5326
+/* { dg-final { scan-assembler-times "extsb" 1 } } */
5327
+/* { dg-final { scan-assembler-times "lhz" 2 } } */
5328
+/* { dg-final { scan-assembler-times "extsh" 1 } } */
5329
+/* { dg-final { scan-assembler-times "lwz" 2 } } */
5330
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
5331
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
5332
@@ -107,8 +107,10 @@
5343
@@ -119,6 +121,12 @@
5347
+#ifdef __LITTLE_ENDIAN__
5348
+#define MAKE_SLOT(x, y) ((long)x | ((long)y << 32))
5350
+#define MAKE_SLOT(x, y) ((long)y | ((long)x << 32))
5353
/* Paramter passing.
5357
sp = __builtin_frame_address(0);
5360
- if (sp->slot[2].l != 0x100000002ULL
5361
- || sp->slot[4].l != 0x500000006ULL)
5362
+ if (sp->slot[2].l != MAKE_SLOT (1, 2)
5363
+ || sp->slot[4].l != MAKE_SLOT (5, 6))
5368
sp = __builtin_frame_address(0);
5371
- if (sp->slot[4].l != 0x100000002ULL
5372
- || sp->slot[6].l != 0x500000006ULL)
5373
+ if (sp->slot[4].l != MAKE_SLOT (1, 2)
5374
+ || sp->slot[6].l != MAKE_SLOT (5, 6))
5379
sp = __builtin_frame_address(0);
5382
- if (sp->slot[4].l != 0x100000002ULL
5383
- || sp->slot[6].l != 0x500000006ULL)
5384
+ if (sp->slot[4].l != MAKE_SLOT (1, 2)
5385
+ || sp->slot[6].l != MAKE_SLOT (5, 6))
5389
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
5390
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
5392
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
5393
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5394
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5395
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5396
+/* { dg-options "-mcpu=power8 -O2" } */
5397
+/* { dg-final { scan-assembler "mtvsrd" } } */
5398
+/* { dg-final { scan-assembler "mfvsrd" } } */
5400
+/* Check code generation for direct move for long types. */
5404
+#define NO_ALTIVEC 1
5405
+#define VSX_REG_ATTR "d"
5407
+#include "direct-move.h"
5408
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
5409
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
5411
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5412
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5413
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5414
+/* { dg-require-effective-target p8vector_hw } */
5415
+/* { dg-options "-mcpu=power8 -O2" } */
5417
+/* Check whether we get the right bits for direct move at runtime. */
5419
+#define TYPE double
5421
+#define NO_ALTIVEC 1
5423
+#define VSX_REG_ATTR "ws"
5425
+#include "direct-move.h"
5426
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
5427
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
5429
+/* { dg-do compile { target { powerpc*-*-* } } } */
5430
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5431
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5432
+/* { dg-options "-mcpu=power8 -O2" } */
5434
+#include <altivec.h>
5436
+typedef vector int v_sign;
5437
+typedef vector unsigned int v_uns;
5439
+v_sign even_sign (v_sign a, v_sign b)
5441
+ return vec_vmrgew (a, b);
5444
+v_uns even_uns (v_uns a, v_uns b)
5446
+ return vec_vmrgew (a, b);
5449
+v_sign odd_sign (v_sign a, v_sign b)
5451
+ return vec_vmrgow (a, b);
5454
+v_uns odd_uns (v_uns a, v_uns b)
5456
+ return vec_vmrgow (a, b);
5459
+/* { dg-final { scan-assembler-times "vmrgew" 2 } } */
5460
+/* { dg-final { scan-assembler-times "vmrgow" 2 } } */
5461
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2.h
5462
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2.h
5464
+/* Test various logical operations. */
5466
+TYPE arg1 (TYPE p, TYPE q) { return p & q; } /* AND */
5467
+TYPE arg2 (TYPE p, TYPE q) { return p | q; } /* OR */
5468
+TYPE arg3 (TYPE p, TYPE q) { return p ^ q; } /* XOR */
5469
+TYPE arg4 (TYPE p) { return ~ p; } /* NOR */
5470
+TYPE arg5 (TYPE p, TYPE q) { return ~(p & q); } /* NAND */
5471
+TYPE arg6 (TYPE p, TYPE q) { return ~(p | q); } /* NOR */
5472
+TYPE arg7 (TYPE p, TYPE q) { return ~(p ^ q); } /* EQV */
5473
+TYPE arg8 (TYPE p, TYPE q) { return (~p) & q; } /* ANDC */
5474
+TYPE arg9 (TYPE p, TYPE q) { return (~p) | q; } /* ORC */
5475
+TYPE arg10(TYPE p, TYPE q) { return (~p) ^ q; } /* EQV */
5476
+TYPE arg11(TYPE p, TYPE q) { return p & (~q); } /* ANDC */
5477
+TYPE arg12(TYPE p, TYPE q) { return p | (~q); } /* ORC */
5478
+TYPE arg13(TYPE p, TYPE q) { return p ^ (~q); } /* EQV */
5480
+void ptr1 (TYPE *p) { p[0] = p[1] & p[2]; } /* AND */
5481
+void ptr2 (TYPE *p) { p[0] = p[1] | p[2]; } /* OR */
5482
+void ptr3 (TYPE *p) { p[0] = p[1] ^ p[2]; } /* XOR */
5483
+void ptr4 (TYPE *p) { p[0] = ~p[1]; } /* NOR */
5484
+void ptr5 (TYPE *p) { p[0] = ~(p[1] & p[2]); } /* NAND */
5485
+void ptr6 (TYPE *p) { p[0] = ~(p[1] | p[2]); } /* NOR */
5486
+void ptr7 (TYPE *p) { p[0] = ~(p[1] ^ p[2]); } /* EQV */
5487
+void ptr8 (TYPE *p) { p[0] = ~(p[1]) & p[2]; } /* ANDC */
5488
+void ptr9 (TYPE *p) { p[0] = (~p[1]) | p[2]; } /* ORC */
5489
+void ptr10(TYPE *p) { p[0] = (~p[1]) ^ p[2]; } /* EQV */
5490
+void ptr11(TYPE *p) { p[0] = p[1] & (~p[2]); } /* ANDC */
5491
+void ptr12(TYPE *p) { p[0] = p[1] | (~p[2]); } /* ORC */
5492
+void ptr13(TYPE *p) { p[0] = p[1] ^ (~p[2]); } /* EQV */
5493
--- a/src/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
5494
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
5496
/* { dg-do compile } */
5497
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5498
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
5499
/* { dg-require-effective-target powerpc_vsx_ok } */
5500
/* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */
5501
/* { dg-final { scan-assembler-times "xvaddsp" 3 } } */
5502
--- a/src/gcc/testsuite/gcc.target/powerpc/quad-atomic.c
5503
+++ b/src/gcc/testsuite/gcc.target/powerpc/quad-atomic.c
5505
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5506
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5507
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5508
+/* { dg-require-effective-target p8vector_hw } */
5509
+/* { dg-options "-mcpu=power8 -O2" } */
5511
+/* Test whether we get the right bits for quad word atomic instructions. */
5512
+#include <stdlib.h>
5514
+static __int128_t quad_fetch_and (__int128_t *, __int128_t value) __attribute__((__noinline__));
5515
+static __int128_t quad_fetch_or (__int128_t *, __int128_t value) __attribute__((__noinline__));
5516
+static __int128_t quad_fetch_add (__int128_t *, __int128_t value) __attribute__((__noinline__));
5519
+quad_fetch_and (__int128_t *ptr, __int128_t value)
5521
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
5525
+quad_fetch_or (__int128_t *ptr, __int128_t value)
5527
+ return __atomic_fetch_or (ptr, value, __ATOMIC_ACQUIRE);
5531
+quad_fetch_add (__int128_t *ptr, __int128_t value)
5533
+ return __atomic_fetch_add (ptr, value, __ATOMIC_ACQUIRE);
5539
+ __int128_t result;
5541
+ __int128_t and_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
5542
+ __int128_t and_value = ((((__int128_t) 0xfffffffffffffff0ULL) << 64) | ((__int128_t) 0xfffffffffffffff0ULL));
5543
+ __int128_t and_exp = ((((__int128_t) 0x1234567890abcde0ULL) << 64) | ((__int128_t) 0xfedcba0987654320ULL));
5545
+ __int128_t or_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
5546
+ __int128_t or_value = ((((__int128_t) 0x0000000000000010ULL) << 64) | ((__int128_t) 0x000000000000000eULL));
5547
+ __int128_t or_exp = ((((__int128_t) 0x1234567890abcdffULL) << 64) | ((__int128_t) 0xfedcba098765432fULL));
5549
+ __int128_t add_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
5550
+ __int128_t add_value = ((((__int128_t) 0x0000000001000000ULL) << 64) | ((__int128_t) 0x0000001000000000ULL));
5551
+ __int128_t add_exp = ((((__int128_t) 0x1234567891abcdefULL) << 64) | ((__int128_t) 0xfedcba1987654321ULL));
5554
+ value = and_input;
5555
+ result = quad_fetch_and (&value, and_value);
5556
+ if (result != and_input || value != and_exp)
5560
+ result = quad_fetch_or (&value, or_value);
5561
+ if (result != or_input || value != or_exp)
5564
+ value = add_input;
5565
+ result = quad_fetch_add (&value, add_value);
5566
+ if (result != add_input || value != add_exp)
5572
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c
5573
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c
5577
/* Wrapper to save the GPRs and FPRs and then jump to the real function. */
5579
+#define FUNC_START(NAME) \
5580
+ "\t.globl\t" NAME "\n\t" \
5581
+ ".section \".opd\",\"aw\"\n\t" \
5584
+ ".quad .L." NAME ",.TOC.@tocbase,0\n\t" \
5586
+ ".type " NAME ", @function\n" \
5587
+ ".L." NAME ":\n\t"
5589
+#define FUNC_START(NAME) \
5590
+ "\t.globl\t" NAME "\n\t" \
5593
+ "0:\taddis 2,12,(.TOC.-0b)@ha\n\t" \
5594
+ "addi 2,2,(.TOC.-0b)@l\n\t" \
5595
+ ".localentry " NAME ",.-" NAME "\n\t"
5597
#define WRAPPER(NAME) \
5598
-__asm__ ("\t.globl\t" #NAME "_asm\n\t" \
5599
- ".section \".opd\",\"aw\"\n\t" \
5601
- #NAME "_asm:\n\t" \
5602
- ".quad .L." #NAME "_asm,.TOC.@tocbase,0\n\t" \
5604
- ".type " #NAME "_asm, @function\n" \
5605
- ".L." #NAME "_asm:\n\t" \
5606
+__asm__ (FUNC_START (#NAME "_asm") \
5607
"ld 11,gparms@got(2)\n\t" \
5618
unsigned long slot[100];
5621
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
5622
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
5624
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5625
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5626
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5627
+/* { dg-require-effective-target p8vector_hw } */
5628
+/* { dg-options "-mcpu=power8 -O2" } */
5630
+/* Check whether we get the right bits for direct move at runtime. */
5634
+#define NO_ALTIVEC 1
5636
+#define VSX_REG_ATTR "d"
5638
+#include "direct-move.h"
5639
--- a/src/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
5640
+++ b/src/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
5642
+/* { dg-do compile { target { powerpc*-*-* } } } */
5643
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5644
+/* { dg-require-effective-target powerpc_vsx_ok } */
5645
+/* { dg-options "-O2 -mcpu=power7" } */
5646
+/* { dg-final { scan-assembler "xxlxor" } } */
5648
+/* Test that we generate xxlor to clear a SFmode register. */
5650
+float sum (float *p, unsigned long n)
5652
+ float sum = 0.0f; /* generate xxlxor instead of load */
5658
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
5659
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
5661
/* { dg-final { scan-assembler-times "fabs" 3 } } */
5662
/* { dg-final { scan-assembler-times "fnabs" 3 } } */
5663
/* { dg-final { scan-assembler-times "fsel" 3 } } */
5664
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
5665
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
5666
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
5668
double normal1 (double, double);
5669
double power5 (double, double) __attribute__((__target__("cpu=power5")));
5670
--- a/src/gcc/testsuite/gcc.target/powerpc/pr60137.c
5671
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr60137.c
5673
+/* { dg-do compile { target { powerpc*-*-* } } } */
5674
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5675
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5676
+/* { dg-options "-mcpu=power8 -O3 -mno-vsx" } */
5678
+/* target/60137, compiler got a 'could not split insn error'. */
5680
+extern int target_flags;
5681
+extern char fixed_regs[53];
5682
+extern char call_used_regs[53];
5684
+void init_reg_sets_1(void)
5687
+ for (i = 0; i < 53; i++)
5688
+ fixed_regs[i] = call_used_regs[i] = (call_used_regs[i] &((target_flags & 0x02000000) ? 2 : 1)) != 0;
5690
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3.h
5691
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3.h
5693
+/* Test forcing 128-bit logical types into GPR registers. */
5695
+#if defined(NO_ASM)
5696
+#define FORCE_REG1(X)
5697
+#define FORCE_REG2(X,Y)
5700
+#if defined(USE_ALTIVEC)
5701
+#define REG_CLASS "+v"
5702
+#define PRINT_REG1 "# altivec reg %0"
5703
+#define PRINT_REG2 "# altivec reg %0, %1"
5705
+#elif defined(USE_FPR)
5706
+#define REG_CLASS "+d"
5707
+#define PRINT_REG1 "# fpr reg %0"
5708
+#define PRINT_REG2 "# fpr reg %0, %1"
5710
+#elif defined(USE_VSX)
5711
+#define REG_CLASS "+wa"
5712
+#define PRINT_REG1 "# vsx reg %x0"
5713
+#define PRINT_REG2 "# vsx reg %x0, %x1"
5716
+#define REG_CLASS "+r"
5717
+#define PRINT_REG1 "# gpr reg %0"
5718
+#define PRINT_REG2 "# gpr reg %0, %1"
5721
+#define FORCE_REG1(X) __asm__ (PRINT_REG1 : REG_CLASS (X))
5722
+#define FORCE_REG2(X,Y) __asm__ (PRINT_REG2 : REG_CLASS (X), REG_CLASS (Y))
5725
+void ptr1 (TYPE *p)
5731
+ FORCE_REG2 (a, b);
5732
+ c = a & b; /* AND */
5737
+void ptr2 (TYPE *p)
5743
+ FORCE_REG2 (a, b);
5744
+ c = a | b; /* OR */
5749
+void ptr3 (TYPE *p)
5755
+ FORCE_REG2 (a, b);
5756
+ c = a ^ b; /* XOR */
5761
+void ptr4 (TYPE *p)
5772
+void ptr5 (TYPE *p)
5778
+ FORCE_REG2 (a, b);
5779
+ c = ~(a & b); /* NAND */
5784
+void ptr6 (TYPE *p)
5790
+ FORCE_REG2 (a, b);
5791
+ c = ~(a | b); /* AND */
5796
+void ptr7 (TYPE *p)
5802
+ FORCE_REG2 (a, b);
5803
+ c = ~(a ^ b); /* EQV */
5808
+void ptr8 (TYPE *p)
5814
+ FORCE_REG2 (a, b);
5815
+ c = (~a) & b; /* ANDC */
5820
+void ptr9 (TYPE *p)
5826
+ FORCE_REG2 (a, b);
5827
+ c = (~a) | b; /* ORC */
5832
+void ptr10 (TYPE *p)
5838
+ FORCE_REG2 (a, b);
5839
+ c = (~a) ^ b; /* EQV */
5844
+void ptr11 (TYPE *p)
5850
+ FORCE_REG2 (a, b);
5851
+ c = a & (~b); /* ANDC */
5856
+void ptr12 (TYPE *p)
5862
+ FORCE_REG2 (a, b);
5863
+ c = a | (~b); /* ORC */
5868
+void ptr13 (TYPE *p)
5874
+ FORCE_REG2 (a, b);
5875
+ c = a ^ (~b); /* AND */
5879
--- a/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
5880
+++ b/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
5882
return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, });
5887
- return __builtin_shuffle(x, y,
5888
- (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
5894
- return __builtin_shuffle(x, y,
5895
- (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
5900
return __builtin_shuffle(x, y,
5902
/* { dg-final { scan-assembler "vspltb" } } */
5903
/* { dg-final { scan-assembler "vsplth" } } */
5904
/* { dg-final { scan-assembler "vspltw" } } */
5905
-/* { dg-final { scan-assembler "vpkuhum" } } */
5906
-/* { dg-final { scan-assembler "vpkuwum" } } */
5907
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
5908
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
5910
+/* { dg-do compile { target { powerpc*-*-* } } } */
5911
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5912
+/* { dg-require-effective-target powerpc_vsx_ok } */
5913
+/* { dg-options "-O2 -mcpu=power7" } */
5914
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
5915
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
5916
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
5917
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
5918
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
5919
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
5920
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
5921
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
5922
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
5923
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
5924
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
5925
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
5926
+/* { dg-final { scan-assembler "\[ \t\]xxland " } } */
5927
+/* { dg-final { scan-assembler "\[ \t\]xxlor " } } */
5928
+/* { dg-final { scan-assembler "\[ \t\]xxlxor " } } */
5929
+/* { dg-final { scan-assembler "\[ \t\]xxlnor " } } */
5930
+/* { dg-final { scan-assembler "\[ \t\]xxlandc " } } */
5931
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
5932
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
5933
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
5936
+typedef int v4si __attribute__ ((vector_size (16)));
5941
--- a/src/gcc/testsuite/ChangeLog.ibm
5942
+++ b/src/gcc/testsuite/ChangeLog.ibm
5944
+2014-03-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5946
+ Backport from mainline 208321
5947
+ 2014-03-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5949
+ * gcc.dg/vmx/extract-vsx.c: Replace "vector long" with "vector
5950
+ long long" throughout.
5951
+ * gcc.dg/vmx/extract-vsx-be-order.c: Likewise.
5952
+ * gcc.dg/vmx/insert-vsx.c: Likewise.
5953
+ * gcc.dg/vmx/insert-vsx-be-order.c: Likewise.
5954
+ * gcc.dg/vmx/ld-vsx.c: Likewise.
5955
+ * gcc.dg/vmx/ld-vsx-be-order.c: Likewise.
5956
+ * gcc.dg/vmx/ldl-vsx.c: Likewise.
5957
+ * gcc.dg/vmx/ldl-vsx-be-order.c: Likewise.
5958
+ * gcc.dg/vmx/merge-vsx.c: Likewise.
5959
+ * gcc.dg/vmx/merge-vsx-be-order.c: Likewise.
5960
+ * gcc.dg/vmx/st-vsx.c: Likewise.
5961
+ * gcc.dg/vmx/st-vsx-be-order.c: Likewise.
5962
+ * gcc.dg/vmx/stl-vsx.c: Likewise.
5963
+ * gcc.dg/vmx/stl-vsx-be-order.c: Likewise.
5965
+2014-02-25 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5967
+ Backport from mainline 208120
5968
+ 2014-02-25 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5970
+ * gcc.dg/vmx/ld-vsx.c: Don't use vec_all_eq.
5971
+ * gcc.dg/vmx/ld-vsx-be-order.c: Likewise.
5972
+ * gcc.dg/vmx/ldl-vsx.c: Likewise.
5973
+ * gcc.dg/vmx/ldl-vsx-be-order.c: Likewise.
5974
+ * gcc.dg/vmx/merge-vsx.c: Likewise.
5975
+ * gcc.dg/vmx/merge-vsx-be-order.c: Likewise.
5977
+2014-02-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5979
+ Backport from mainline 208049
5980
+ 2014-02-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5982
+ * gcc.dg/vmx/lde.c: New test.
5983
+ * gcc.dg/vmx/lde-be-order.c: New test.
5984
+ * gcc.dg/vmx/ste.c: New test.
5985
+ * gcc.dg/vmx/ste-be-order.c: New test.
5987
+2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5989
+ Backport from mainline 208021
5990
+ 2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5992
+ * gcc.dg/vmx/vsums.c: Check entire result vector.
5993
+ * gcc.dg/vmx/vsums-be-order.c: Likewise.
5995
+2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5997
+ Backport from mainline 208019
5998
+ 2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6000
+ * gcc.dg/vmx/ld.c: New test.
6001
+ * gcc.dg/vmx/ld-be-order.c: New test.
6002
+ * gcc.dg/vmx/ld-vsx.c: New test.
6003
+ * gcc.dg/vmx/ld-vsx-be-order.c: New test.
6004
+ * gcc.dg/vmx/ldl.c: New test.
6005
+ * gcc.dg/vmx/ldl-be-order.c: New test.
6006
+ * gcc.dg/vmx/ldl-vsx.c: New test.
6007
+ * gcc.dg/vmx/ldl-vsx-be-order.c: New test.
6008
+ * gcc.dg/vmx/st.c: New test.
6009
+ * gcc.dg/vmx/st-be-order.c: New test.
6010
+ * gcc.dg/vmx/st-vsx.c: New test.
6011
+ * gcc.dg/vmx/st-vsx-be-order.c: New test.
6012
+ * gcc.dg/vmx/stl.c: New test.
6013
+ * gcc.dg/vmx/stl-be-order.c: New test.
6014
+ * gcc.dg/vmx/stl-vsx.c: New test.
6015
+ * gcc.dg/vmx/stl-vsx-be-order.c: New test.
6017
+2014-02-15 Michael Meissner <meissner@linux.vnet.ibm.com>
6019
+ Backport from mainline r207808.
6020
+ 2014-02-15 Michael Meissner <meissner@linux.vnet.ibm.com>
6023
+ * gcc.target/powerpc/pr60203.c: New testsuite.
6025
+2014-02-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6027
+ Backport from mainline r207699.
6028
+ 2014-02-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6031
+ * gcc.target/powerpc/pr60137.c: New file.
6033
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6035
+ Backport from mainline r207521
6036
+ 2014-02-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6038
+ * gcc.dg/vmx/sum2s.c: New.
6039
+ * gcc.dg/vmx/sum2s-be-order.c: New.
6041
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6043
+ Backport from mainline r207520
6044
+ 2014-02-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6046
+ * gcc.dg/vmx/pack.c: New.
6047
+ * gcc.dg/vmx/pack-be-order.c: New.
6048
+ * gcc.dg/vmx/unpack.c: New.
6049
+ * gcc.dg/vmx/unpack-be-order.c: New.
6051
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6053
+ Backport from mainline r207415
6054
+ 2014-02-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6056
+ * gcc.dg/vmx/3b-15.c: Remove special handling for little endian.
6057
+ * gcc.dg/vmx/perm.c: New.
6058
+ * gcc.dg/vmx/perm-be-order.c: New.
6060
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6062
+ Backport from mainline r207414
6063
+ 2014-02-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6065
+ * gcc.dg/vmx/vsums.c: New.
6066
+ * gcc.dg/vmx/vsums-be-order.c: New.
6068
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6070
+ Backport from mainline r207318
6071
+ 2014-01-30 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6073
+ * gcc.dg/vmx/splat.c: New.
6074
+ * gcc.dg/vmx/splat-vsx.c: New.
6075
+ * gcc.dg/vmx/splat-be-order.c: New.
6076
+ * gcc.dg/vmx/splat-vsx-be-order.c: New.
6077
+ * gcc.dg/vmx/eg-5.c: Remove special casing for little endian.
6078
+ * gcc.dg/vmx/sn7153.c: Add special casing for little endian.
6080
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6082
+ Backport from mainline r207262
6083
+ 2014-01-29 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6085
+ * gcc.dg/vmx/merge-be-order.c: New.
6086
+ * gcc.dg/vmx/merge.c: New.
6087
+ * gcc.dg/vmx/merge-vsx-be-order.c: New.
6088
+ * gcc.dg/vmx/merge-vsx.c: New.
6090
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6092
+ Backport from mainline r206926
6093
+ 2014-01-22 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6095
+ * gcc.dg/vmx/insert-vsx-be-order.c: New.
6096
+ * gcc.dg/vmx/extract-vsx.c: New.
6097
+ * gcc.dg/vmx/extract-vsx-be-order.c: New.
6098
+ * gcc.dg/vmx/insert-vsx.c: New.
6100
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6102
+ Backport from mainline r206641
6103
+ 2014-01-15 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
6105
+ * gcc.dg/vmx/mult-even-odd.c: New.
6106
+ * gcc.dg/vmx/mult-even-odd-be-order.c: New.
6108
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6110
+ Backport from mainline r206590
6111
+ 2014-01-13 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6113
+ * gcc.dg/vmx/insert.c: New.
6114
+ * gcc.dg/vmx/insert-be-order.c: New.
6115
+ * gcc.dg/vmx/extract.c: New.
6116
+ * gcc.dg/vmx/extract-be-order.c: New.
6118
+2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com>
6120
+ Back port from mainline
6121
+ 2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com>
6124
+ * gcc.target/powerpc/quad-atomic.c: New file to test power8 quad
6125
+ word atomic functions at runtime.
6127
+2014-01-14 Michael Meissner <meissner@linux.vnet.ibm.com>
6129
+ Backport from mainline
6131
+ 2013-10-23 Pat Haugen <pthaugen@us.ibm.com>
6133
+ * gcc.target/powerpc/direct-move.h: Fix header for executable tests.
6135
+2013-12-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6137
+ Backport from mainline r205638
6138
+ 2013-12-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6140
+ * gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c: Skip for little
6143
+2013-11-27 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6145
+ Backport from mainline r205464
6146
+ 2013-11-27 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6148
+ * gfortran.dg/nan_7.f90: Disable for little endian PowerPC.
6150
+2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6152
+ Backport from mainline
6153
+ 2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6156
+ * gcc.target/powerpc/direct-move.h (VSX_REG_ATTR): Allow test to
6157
+ specify an appropriate register class for VSX operations.
6158
+ (load_vsx): Use it.
6159
+ (load_gpr_to_vsx): Likewise.
6160
+ (load_vsx_to_gpr): Likewise.
6161
+ * gcc.target/powerpc/direct-move-vint1.c: Use an appropriate
6162
+ register class for VSX registers that the type can handle. Remove
6163
+ checks for explicit number of instructions generated, just check
6164
+ if the instruction is generated.
6165
+ * gcc.target/powerpc/direct-move-vint2.c: Likewise.
6166
+ * gcc.target/powerpc/direct-move-float1.c: Likewise.
6167
+ * gcc.target/powerpc/direct-move-float2.c: Likewise.
6168
+ * gcc.target/powerpc/direct-move-double1.c: Likewise.
6169
+ * gcc.target/powerpc/direct-move-double2.c: Likewise.
6170
+ * gcc.target/powerpc/direct-move-long1.c: Likewise.
6171
+ * gcc.target/powerpc/direct-move-long2.c: Likewise.
6173
+ * gcc.target/powerpc/bool3-av.c: Limit to 64-bit mode for now.
6174
+ * gcc.target/powerpc/bool3-p7.c: Likewise.
6175
+ * gcc.target/powerpc/bool3-p8.c: Likewise.
6177
+ * gcc.target/powerpc/p8vector-ldst.c: Just check that the
6178
+ appropriate instructions are generated, don't check the count.
6180
+ 2013-11-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6183
+ * gcc.target/powerpc/pr59054.c: New test.
6185
+2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6187
+ Backport from mainline r205146
6188
+ 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6190
+ * gcc.target/powerpc/pr48258-1.c: Skip for little endian.
6192
+2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6194
+ Backport from mainline r205106:
6196
+ 2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6198
+ * gcc.target/powerpc/darwin-longlong.c (msw): Make endian-safe.
6200
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6202
+ Backport from mainline r205046:
6204
+ 2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6206
+ * gcc.target/powerpc/ppc64-abi-2.c (MAKE_SLOT): New macro to
6207
+ construct parameter slot value in endian-independent way.
6208
+ (fcevv, fciievv, fcvevv): Use it.
6210
+2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6212
+ Backport from mainline r204862
6213
+ 2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6215
+ * gcc.dg/vmx/3b-15.c: Revise for little endian.
6217
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6219
+ Backport from mainline r204808:
6221
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6223
+ * gcc.target/powerpc/ppc64-abi-1.c (stack_frame_t): Remove
6224
+ compiler and linker field if _CALL_ELF == 2.
6225
+ * gcc.target/powerpc/ppc64-abi-2.c (stack_frame_t): Likewise.
6226
+ * gcc.target/powerpc/ppc64-abi-dfp-1.c (stack_frame_t): Likewise.
6227
+ * gcc.dg/stack-usage-1.c (SIZE): Update value for _CALL_ELF == 2.
6229
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6231
+ * gcc.target/powerpc/ppc64-abi-dfp-1.c (FUNC_START): New macro.
6232
+ (WRAPPER): Use it.
6233
+ * gcc.target/powerpc/no-r11-1.c: Skip on powerpc_elfv2.
6234
+ * gcc.target/powerpc/no-r11-2.c: Skip on powerpc_elfv2.
6235
+ * gcc.target/powerpc/no-r11-3.c: Skip on powerpc_elfv2.
6237
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6239
+ * lib/target-supports.exp (check_effective_target_powerpc_elfv2):
6241
+ * gcc.target/powerpc/pr57949-1.c: Disable for powerpc_elfv2.
6242
+ * gcc.target/powerpc/pr57949-2.c: Likewise.
6244
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6246
+ Backport from mainline r204799:
6248
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6250
+ * g++.dg/eh/ppc64-sighandle-cr.C: New test.
6252
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6254
+ Backport from mainline r201750.
6255
+ Note: Default setting of -mcompat-align-parm inverted!
6257
+ 2013-08-14 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6260
+ * gcc.target/powerpc/pr57949-1.c: New.
6261
+ * gcc.target/powerpc/pr57949-2.c: New.
6263
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6265
+ Backport from mainline r201040 and r201929:
6267
+ 2013-08-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6269
+ * gcc.target/powerpc/pr57744.c: Declare abort.
6271
+ 2013-07-18 Pat Haugen <pthaugen@us.ibm.com>
6273
+ * gcc.target/powerpc/pr57744.c: Fix typo.
6275
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6277
+ Backport from mainline r204321
6278
+ 2013-11-02 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
6280
+ * gcc.dg/vmx/vec-set.c: New.
6282
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6284
+ Backport from mainline r204138
6285
+ 2013-10-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6287
+ * gcc.dg/vmx/gcc-bug-i.c: Add little endian variant.
6288
+ * gcc.dg/vmx/eg-5.c: Likewise.
6290
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6292
+ Backport from mainline r203930
6293
+ 2013-10-22 Bill Schmidt <wschmidt@vnet.ibm.com>
6295
+ * gcc.target/powerpc/altivec-perm-1.c: Move the two vector pack
6297
+ * gcc.target/powerpc/altivec-perm-3.c: ...this new test, which is
6298
+ restricted to big-endian targets.
6300
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6302
+ Backport from mainline r203246
6303
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6305
+ * gcc.target/powerpc/pr43154.c: Skip for ppc64 little endian.
6306
+ * gcc.target/powerpc/fusion.c: Likewise.
6308
+2013-10-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6310
+ Backport from mainline
6311
+ 2013-04-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6314
+ * gcc.target/powerpc/recip-1.c: Modify expected output.
6315
+ * gcc.target/powerpc/recip-3.c: Likewise.
6316
+ * gcc.target/powerpc/recip-4.c: Likewise.
6317
+ * gcc.target/powerpc/recip-5.c: Add expected output for iterations.
6319
+2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
6321
+ Back port from mainline
6322
+ 2013-10-03 Michael Meissner <meissner@linux.vnet.ibm.com>
6324
+ * gcc.target/powerpc/p8vector-fp.c: New test for floating point
6325
+ scalar operations when using -mupper-regs-sf and -mupper-regs-df.
6326
+ * gcc.target/powerpc/ppc-target-1.c: Update tests to allow either
6327
+ VSX scalar operations or the traditional floating point form of
6329
+ * gcc.target/powerpc/ppc-target-2.c: Likewise.
6330
+ * gcc.target/powerpc/recip-3.c: Likewise.
6331
+ * gcc.target/powerpc/recip-5.c: Likewise.
6332
+ * gcc.target/powerpc/pr72747.c: Likewise.
6333
+ * gcc.target/powerpc/vsx-builtin-3.c: Likewise.
6335
+ Back port from mainline
6336
+ 2013-09-27 Michael Meissner <meissner@linux.vnet.ibm.com>
6338
+ * gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
6339
+ and -mupper-regs-df.
6341
+ Back port from mainline
6342
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
6345
+ * gcc.target/powerpc/pr58673-1.c: New file to test whether
6346
+ -mquad-word + -mno-vsx-timode causes errors.
6347
+ * gcc.target/powerpc/pr58673-2.c: Likewise.
6349
+2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
6351
+ Back port from mainline
6352
+ 2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
6354
+ * gcc.target/powerpc/dfp-dd-2.c: New test.
6355
+ * gcc.target/powerpc/dfp-td-2.c: Likewise.
6356
+ * gcc.target/powerpc/dfp-td-3.c: Likewise.
6358
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6360
+ Backport from trunk.
6361
+ 2013-07-23 Michael Meissner <meissner@linux.vnet.ibm.com>
6363
+ * gcc.target/powerpc/bool2.h: New file, test the code generation
6364
+ of logical operations for power5, altivec, power7, and power8 systems.
6365
+ * gcc.target/powerpc/bool2-p5.c: Likewise.
6366
+ * gcc.target/powerpc/bool2-av.c: Likewise.
6367
+ * gcc.target/powerpc/bool2-p7.c: Likewise.
6368
+ * gcc.target/powerpc/bool2-p8.c: Likewise.
6369
+ * gcc.target/powerpc/bool3.h: Likewise.
6370
+ * gcc.target/powerpc/bool3-av.c: Likewise.
6371
+ * gcc.target/powerpc/bool2-p7.c: Likewise.
6372
+ * gcc.target/powerpc/bool2-p8.c: Likewise.
6374
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6376
+ Backport from trunk.
6377
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6379
+ * gcc.target/powerpc/fusion.c: New file, test power8 fusion support.
6381
+2013-08-05 Michael Meissner <meissner@linux.vnet.ibm.com>
6383
+ Back port from mainline:
6384
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6385
+ Pat Haugen <pthaugen@us.ibm.com>
6386
+ Peter Bergner <bergner@vnet.ibm.com>
6388
+ * lib/target-supports.exp (check_p8vector_hw_available) Add power8
6390
+ (check_effective_target_powerpc_p8vector_ok): Likewise.
6391
+ (is-effective-target): Likewise.
6392
+ (check_vect_support_and_set_flags): Likewise.
6394
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
6396
+ Back port from mainline
6397
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
6398
+ Peter Bergner <bergner@vnet.ibm.com>
6401
+ * g++.dg/overload/using3.C: New.
6402
+ * g++.dg/overload/using2.C: Adjust.
6403
+ * g++.dg/lookup/using9.C: Likewise.
6405
+2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6407
+ Back port from mainline
6408
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6410
+ * gcc.target/powerpc/fusion.c: New file, test power8 fusion
6413
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
6415
+ Back port from mainline
6416
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
6418
+ * lib/target-supports.exp (check_effective_target_powerpc_htm_ok): New
6419
+ function to test if HTM is available.
6420
+ * gcc.target/powerpc/htm-xl-intrin-1.c: New test.
6421
+ * gcc.target/powerpc/htm-builtin-1.c: New test.
6423
+2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
6425
+ Back port from the trunk
6426
+ 2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
6429
+ * gcc.target/powerpc/pr57744.c: New test to make sure lqarx and
6430
+ stqcx. get even registers.
6432
+2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6434
+ Back port from the trunk
6436
+ 2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6437
+ Pat Haugen <pthaugen@us.ibm.com>
6438
+ Peter Bergner <bergner@vnet.ibm.com>
6440
+ * gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
6441
+ load/store instructions on power7, power8.
6442
+ * gcc.target/powerpc/atomic-p8.c: Likewise.
6444
+2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6446
+ Back port from the trunk
6448
+ 2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6449
+ Pat Haugen <pthaugen@us.ibm.com>
6450
+ Peter Bergner <bergner@vnet.ibm.com>
6452
+ * gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
6453
+ load/store instructions on power7, power8.
6454
+ * gcc.target/powerpc/atomic-p8.c: Likewise.
6456
+ Back port from the trunk
6458
+ 2013-06-10 Michael Meissner <meissner@linux.vnet.ibm.com>
6459
+ Pat Haugen <pthaugen@us.ibm.com>
6460
+ Peter Bergner <bergner@vnet.ibm.com>
6462
+ * gcc.target/powerpc/direct-move-vint1.c: New tests for power8
6463
+ direct move instructions.
6464
+ * gcc.target/powerpc/direct-move-vint2.c: Likewise.
6465
+ * gcc.target/powerpc/direct-move.h: Likewise.
6466
+ * gcc.target/powerpc/direct-move-float1.c: Likewise.
6467
+ * gcc.target/powerpc/direct-move-float2.c: Likewise.
6468
+ * gcc.target/powerpc/direct-move-double1.c: Likewise.
6469
+ * gcc.target/powerpc/direct-move-double2.c: Likewise.
6470
+ * gcc.target/powerpc/direct-move-long1.c: Likewise.
6471
+ * gcc.target/powerpc/direct-move-long2.c: Likewise.
6473
+2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6475
+ Backport from the trunk
6477
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6478
+ Pat Haugen <pthaugen@us.ibm.com>
6479
+ Peter Bergner <bergner@vnet.ibm.com>
6481
+ * gcc.target/powerpc/p8vector-builtin-1.c: New test to test
6482
+ power8 builtin functions.
6483
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c: Likewise.
6484
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c: Likewise.
6485
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c: Likewise.
6486
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c: Likewise.
6487
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c: Likewise.
6488
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c: Likewise.
6489
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c: New
6490
+ tests to test power8 auto-vectorization.
6491
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c: Likewise.
6492
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c: Likewise.
6493
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c: Likewise.
6494
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c: Likewise.
6496
+ * gcc.target/powerpc/crypto-builtin-1.c: Use effective target
6497
+ powerpc_p8vector_ok instead of powerpc_vsx_ok.
6499
+ * gcc.target/powerpc/bool.c: New file, add eqv, nand, nor tests.
6501
+ * lib/target-supports.exp (check_p8vector_hw_available) Add power8
6503
+ (check_effective_target_powerpc_p8vector_ok): Likewise.
6504
+ (is-effective-target): Likewise.
6505
+ (check_vect_support_and_set_flags): Likewise.
6507
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
6509
+ Backport from trunk
6511
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6512
+ Pat Haugen <pthaugen@us.ibm.com>
6513
+ Peter Bergner <bergner@vnet.ibm.com>
6515
+ * gcc.target/powerpc/crypto-builtin-1.c: New file, test for power8
6518
+2013-05-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6520
+ Backport from trunk
6521
+ 2013-05-03 Michael Meissner <meissner@linux.vnet.ibm.com>
6524
+ * gcc.target/powerpc/pr57150.c: New file.
6526
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6528
+ Backport from mainline
6529
+ 2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6531
+ * gcc.target/powerpc/mmfpgpr.c: New test.
6532
+ * gcc.target/powerpc/sd-vsx.c: Likewise.
6533
+ * gcc.target/powerpc/sd-pwr6.c: Likewise.
6534
+ * gcc.target/powerpc/vsx-float0.c: Likewise.
6536
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6538
+ Clone branch from gcc-4_8-branch, subversion id 196835.
6539
--- a/src/gcc/testsuite/lib/target-supports.exp
6540
+++ b/src/gcc/testsuite/lib/target-supports.exp
6541
@@ -1311,6 +1311,32 @@
6545
+# Return 1 if the target supports executing power8 vector instructions, 0
6546
+# otherwise. Cache the result.
6548
+proc check_p8vector_hw_available { } {
6549
+ return [check_cached_effective_target p8vector_hw_available {
6550
+ # Some simulators are known to not support VSX/power8 instructions.
6551
+ # For now, disable on Darwin
6552
+ if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} {
6555
+ set options "-mpower8-vector"
6556
+ check_runtime_nocache p8vector_hw_available {
6560
+ asm volatile ("xxlorc vs0,vs0,vs0");
6562
+ asm volatile ("xxlorc 0,0,0");
6571
# Return 1 if the target supports executing VSX instructions, 0
6572
# otherwise. Cache the result.
6574
@@ -2672,6 +2698,33 @@
6578
+# Return 1 if this is a PowerPC target supporting -mpower8-vector
6580
+proc check_effective_target_powerpc_p8vector_ok { } {
6581
+ if { ([istarget powerpc*-*-*]
6582
+ && ![istarget powerpc-*-linux*paired*])
6583
+ || [istarget rs6000-*-*] } {
6584
+ # AltiVec is not supported on AIX before 5.3.
6585
+ if { [istarget powerpc*-*-aix4*]
6586
+ || [istarget powerpc*-*-aix5.1*]
6587
+ || [istarget powerpc*-*-aix5.2*] } {
6590
+ return [check_no_compiler_messages powerpc_p8vector_ok object {
6593
+ asm volatile ("xxlorc vs0,vs0,vs0");
6595
+ asm volatile ("xxlorc 0,0,0");
6599
+ } "-mpower8-vector"]
6605
# Return 1 if this is a PowerPC target supporting -mvsx
6607
proc check_effective_target_powerpc_vsx_ok { } {
6608
@@ -2699,6 +2752,27 @@
6612
+# Return 1 if this is a PowerPC target supporting -mhtm
6614
+proc check_effective_target_powerpc_htm_ok { } {
6615
+ if { ([istarget powerpc*-*-*]
6616
+ && ![istarget powerpc-*-linux*paired*])
6617
+ || [istarget rs6000-*-*] } {
6618
+ # HTM is not supported on AIX yet.
6619
+ if { [istarget powerpc*-*-aix*] } {
6622
+ return [check_no_compiler_messages powerpc_htm_ok object {
6624
+ asm volatile ("tbegin. 0");
6633
# Return 1 if this is a PowerPC target supporting -mcpu=cell.
6635
proc check_effective_target_powerpc_ppu_ok { } {
6636
@@ -2794,6 +2868,22 @@
6640
+# Return 1 if this is a PowerPC target using the ELFv2 ABI.
6642
+proc check_effective_target_powerpc_elfv2 { } {
6643
+ if { [istarget powerpc*-*-*] } {
6644
+ return [check_no_compiler_messages powerpc_elfv2 object {
6645
+ #if _CALL_ELF != 2
6646
+ #error not ELF v2 ABI
6656
# Return 1 if this is a SPU target with a toolchain that
6657
# supports automatic overlay generation.
6659
@@ -4499,6 +4589,7 @@
6661
"vmx_hw" { set selected [check_vmx_hw_available] }
6662
"vsx_hw" { set selected [check_vsx_hw_available] }
6663
+ "p8vector_hw" { set selected [check_p8vector_hw_available] }
6664
"ppc_recip_hw" { set selected [check_ppc_recip_hw_available] }
6665
"named_sections" { set selected [check_named_sections_available] }
6666
"gc_sections" { set selected [check_gc_sections_available] }
6667
@@ -4520,6 +4611,7 @@
6669
"vmx_hw" { return 1 }
6670
"vsx_hw" { return 1 }
6671
+ "p8vector_hw" { return 1 }
6672
"ppc_recip_hw" { return 1 }
6673
"named_sections" { return 1 }
6674
"gc_sections" { return 1 }
6675
@@ -5077,7 +5169,9 @@
6678
lappend DEFAULT_VECTCFLAGS "-maltivec"
6679
- if [check_vsx_hw_available] {
6680
+ if [check_p8vector_hw_available] {
6681
+ lappend DEFAULT_VECTCFLAGS "-mpower8-vector" "-mno-allow-movmisalign"
6682
+ } elseif [check_vsx_hw_available] {
6683
lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
6686
--- a/src/gcc/testsuite/gfortran.dg/nan_7.f90
6687
+++ b/src/gcc/testsuite/gfortran.dg/nan_7.f90
6689
! { dg-options "-fno-range-check" }
6690
! { dg-require-effective-target fortran_real_16 }
6691
! { dg-require-effective-target fortran_integer_16 }
6692
+! { dg-skip-if "" { "powerpc*le-*-*" } { "*" } { "" } }
6693
! PR47293 NAN not correctly read
6694
character(len=200) :: str
6696
--- a/src/gcc/testsuite/gcc.dg/vmx/stl-be-order.c
6697
+++ b/src/gcc/testsuite/gcc.dg/vmx/stl-be-order.c
6699
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
6701
+#include "harness.h"
6703
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
6704
+static signed char svsc[16] __attribute__ ((aligned (16)));
6705
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
6706
+static unsigned short svus[8] __attribute__ ((aligned (16)));
6707
+static signed short svss[8] __attribute__ ((aligned (16)));
6708
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
6709
+static unsigned short svp[8] __attribute__ ((aligned (16)));
6710
+static unsigned int svui[4] __attribute__ ((aligned (16)));
6711
+static signed int svsi[4] __attribute__ ((aligned (16)));
6712
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
6713
+static float svf[4] __attribute__ ((aligned (16)));
6715
+static void check_arrays ()
6718
+ for (i = 0; i < 16; ++i)
6720
+ check (svuc[i] == i, "svuc");
6721
+ check (svsc[i] == i - 8, "svsc");
6722
+ check (svbc[i] == ((i % 2) ? 0xff : 0), "svbc");
6724
+ for (i = 0; i < 8; ++i)
6726
+ check (svus[i] == i, "svus");
6727
+ check (svss[i] == i - 4, "svss");
6728
+ check (svbs[i] == ((i % 2) ? 0xffff : 0), "svbs");
6729
+ check (svp[i] == i, "svp");
6731
+ for (i = 0; i < 4; ++i)
6733
+ check (svui[i] == i, "svui");
6734
+ check (svsi[i] == i - 2, "svsi");
6735
+ check (svbi[i] == ((i % 2) ? 0xffffffff : 0), "svbi");
6736
+ check (svf[i] == i * 1.0f, "svf");
6740
+static void test ()
6742
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
6743
+ vector unsigned char vuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
6744
+ vector signed char vsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};
6745
+ vector bool char vbc = {255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0};
6746
+ vector unsigned short vus = {7,6,5,4,3,2,1,0};
6747
+ vector signed short vss = {3,2,1,0,-1,-2,-3,-4};
6748
+ vector bool short vbs = {65535,0,65535,0,65535,0,65535,0};
6749
+ vector pixel vp = {7,6,5,4,3,2,1,0};
6750
+ vector unsigned int vui = {3,2,1,0};
6751
+ vector signed int vsi = {1,0,-1,-2};
6752
+ vector bool int vbi = {0xffffffff,0,0xffffffff,0};
6753
+ vector float vf = {3.0,2.0,1.0,0.0};
6755
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6756
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
6757
+ vector bool char vbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
6758
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
6759
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
6760
+ vector bool short vbs = {0,65535,0,65535,0,65535,0,65535};
6761
+ vector pixel vp = {0,1,2,3,4,5,6,7};
6762
+ vector unsigned int vui = {0,1,2,3};
6763
+ vector signed int vsi = {-2,-1,0,1};
6764
+ vector bool int vbi = {0,0xffffffff,0,0xffffffff};
6765
+ vector float vf = {0.0,1.0,2.0,3.0};
6768
+ vec_stl (vuc, 0, (vector unsigned char *)svuc);
6769
+ vec_stl (vsc, 0, (vector signed char *)svsc);
6770
+ vec_stl (vbc, 0, (vector bool char *)svbc);
6771
+ vec_stl (vus, 0, (vector unsigned short *)svus);
6772
+ vec_stl (vss, 0, (vector signed short *)svss);
6773
+ vec_stl (vbs, 0, (vector bool short *)svbs);
6774
+ vec_stl (vp, 0, (vector pixel *)svp);
6775
+ vec_stl (vui, 0, (vector unsigned int *)svui);
6776
+ vec_stl (vsi, 0, (vector signed int *)svsi);
6777
+ vec_stl (vbi, 0, (vector bool int *)svbi);
6778
+ vec_stl (vf, 0, (vector float *)svf);
6782
--- a/src/gcc/testsuite/gcc.dg/vmx/perm-be-order.c
6783
+++ b/src/gcc/testsuite/gcc.dg/vmx/perm-be-order.c
6785
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
6787
+#include "harness.h"
6791
+ /* Input vectors. */
6792
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6793
+ vector unsigned char vucb = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6794
+ vector signed char vsca = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
6795
+ vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6796
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
6797
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
6798
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
6799
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
6800
+ vector unsigned int vuia = {0,1,2,3};
6801
+ vector unsigned int vuib = {4,5,6,7};
6802
+ vector signed int vsia = {-4,-3,-2,-1};
6803
+ vector signed int vsib = {0,1,2,3};
6804
+ vector float vfa = {-4.0,-3.0,-2.0,-1.0};
6805
+ vector float vfb = {0.0,1.0,2.0,3.0};
6807
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
6808
+ vector unsigned char vucp = {15,16,14,17,13,18,12,19,11,20,10,21,9,22,8,23};
6809
+ vector unsigned char vscp = {15,16,14,17,13,18,12,19,11,20,10,21,9,22,8,23};
6810
+ vector unsigned char vusp = {15,14,17,16,13,12,19,18,11,10,21,20,9,8,23,22};
6811
+ vector unsigned char vssp = {15,14,17,16,13,12,19,18,11,10,21,20,9,8,23,22};
6812
+ vector unsigned char vuip = {15,14,13,12,19,18,17,16,11,10,9,8,23,22,21,20};
6813
+ vector unsigned char vsip = {15,14,13,12,19,18,17,16,11,10,9,8,23,22,21,20};
6814
+ vector unsigned char vfp = {15,14,13,12,19,18,17,16,11,10,9,8,23,22,21,20};
6816
+ vector unsigned char vucp = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
6817
+ vector unsigned char vscp = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
6818
+ vector unsigned char vusp = {0,1,30,31,2,3,28,29,4,5,26,27,6,7,24,25};
6819
+ vector unsigned char vssp = {0,1,30,31,2,3,28,29,4,5,26,27,6,7,24,25};
6820
+ vector unsigned char vuip = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
6821
+ vector unsigned char vsip = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
6822
+ vector unsigned char vfp = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
6825
+ /* Result vectors. */
6826
+ vector unsigned char vuc;
6827
+ vector signed char vsc;
6828
+ vector unsigned short vus;
6829
+ vector signed short vss;
6830
+ vector unsigned int vui;
6831
+ vector signed int vsi;
6834
+ /* Expected result vectors. */
6835
+ vector unsigned char vucr = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
6836
+ vector signed char vscr = {-16,15,-15,14,-14,13,-13,12,-12,11,-11,10,-10,9,-9,8};
6837
+ vector unsigned short vusr = {0,15,1,14,2,13,3,12};
6838
+ vector signed short vssr = {-8,7,-7,6,-6,5,-5,4};
6839
+ vector unsigned int vuir = {0,7,1,6};
6840
+ vector signed int vsir = {-4,3,-3,2};
6841
+ vector float vfr = {-4.0,3.0,-3.0,2.0};
6843
+ vuc = vec_perm (vuca, vucb, vucp);
6844
+ vsc = vec_perm (vsca, vscb, vscp);
6845
+ vus = vec_perm (vusa, vusb, vusp);
6846
+ vss = vec_perm (vssa, vssb, vssp);
6847
+ vui = vec_perm (vuia, vuib, vuip);
6848
+ vsi = vec_perm (vsia, vsib, vsip);
6849
+ vf = vec_perm (vfa, vfb, vfp );
6851
+ check (vec_all_eq (vuc, vucr), "vuc");
6852
+ check (vec_all_eq (vsc, vscr), "vsc");
6853
+ check (vec_all_eq (vus, vusr), "vus");
6854
+ check (vec_all_eq (vss, vssr), "vss");
6855
+ check (vec_all_eq (vui, vuir), "vui");
6856
+ check (vec_all_eq (vsi, vsir), "vsi");
6857
+ check (vec_all_eq (vf, vfr), "vf" );
6859
--- a/src/gcc/testsuite/gcc.dg/vmx/insert-be-order.c
6860
+++ b/src/gcc/testsuite/gcc.dg/vmx/insert-be-order.c
6862
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
6864
+#include "harness.h"
6868
+ vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6869
+ vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
6870
+ vector unsigned short vc = {0,1,2,3,4,5,6,7};
6871
+ vector signed short vd = {-4,-3,-2,-1,0,1,2,3};
6872
+ vector unsigned int ve = {0,1,2,3};
6873
+ vector signed int vf = {-2,-1,0,1};
6874
+ vector float vg = {-2.0f,-1.0f,0.0f,1.0f};
6876
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
6877
+ check (vec_all_eq (vec_insert (16, va, 5),
6878
+ ((vector unsigned char)
6879
+ {0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15})),
6880
+ "vec_insert (va LE)");
6881
+ check (vec_all_eq (vec_insert (-16, vb, 0),
6882
+ ((vector signed char)
6883
+ {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,-16})),
6884
+ "vec_insert (vb LE)");
6885
+ check (vec_all_eq (vec_insert (16, vc, 7),
6886
+ ((vector unsigned short){16,1,2,3,4,5,6,7})),
6887
+ "vec_insert (vc LE)");
6888
+ check (vec_all_eq (vec_insert (-16, vd, 3),
6889
+ ((vector signed short){-4,-3,-2,-1,-16,1,2,3})),
6890
+ "vec_insert (vd LE)");
6891
+ check (vec_all_eq (vec_insert (16, ve, 2),
6892
+ ((vector unsigned int){0,16,2,3})),
6893
+ "vec_insert (ve LE)");
6894
+ check (vec_all_eq (vec_insert (-16, vf, 1),
6895
+ ((vector signed int){-2,-1,-16,1})),
6896
+ "vec_insert (vf LE)");
6897
+ check (vec_all_eq (vec_insert (-16.0f, vg, 0),
6898
+ ((vector float){-2.0f,-1.0f,0.0f,-16.0f})),
6899
+ "vec_insert (vg LE)");
6901
+ check (vec_all_eq (vec_insert (16, va, 5),
6902
+ ((vector unsigned char)
6903
+ {0,1,2,3,4,16,6,7,8,9,10,11,12,13,14,15})),
6904
+ "vec_insert (va BE)");
6905
+ check (vec_all_eq (vec_insert (-16, vb, 0),
6906
+ ((vector signed char)
6907
+ {-16,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7})),
6908
+ "vec_insert (vb BE)");
6909
+ check (vec_all_eq (vec_insert (16, vc, 7),
6910
+ ((vector unsigned short){0,1,2,3,4,5,6,16})),
6911
+ "vec_insert (vc BE)");
6912
+ check (vec_all_eq (vec_insert (-16, vd, 3),
6913
+ ((vector signed short){-4,-3,-2,-16,0,1,2,3})),
6914
+ "vec_insert (vd BE)");
6915
+ check (vec_all_eq (vec_insert (16, ve, 2),
6916
+ ((vector unsigned int){0,1,16,3})),
6917
+ "vec_insert (ve BE)");
6918
+ check (vec_all_eq (vec_insert (-16, vf, 1),
6919
+ ((vector signed int){-2,-16,0,1})),
6920
+ "vec_insert (vf BE)");
6921
+ check (vec_all_eq (vec_insert (-16.0f, vg, 0),
6922
+ ((vector float){-16.0f,-1.0f,0.0f,1.0f})),
6923
+ "vec_insert (vg BE)");
6927
--- a/src/gcc/testsuite/gcc.dg/vmx/ldl.c
6928
+++ b/src/gcc/testsuite/gcc.dg/vmx/ldl.c
6930
+#include "harness.h"
6932
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
6933
+static signed char svsc[16] __attribute__ ((aligned (16)));
6934
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
6935
+static unsigned short svus[8] __attribute__ ((aligned (16)));
6936
+static signed short svss[8] __attribute__ ((aligned (16)));
6937
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
6938
+static unsigned short svp[8] __attribute__ ((aligned (16)));
6939
+static unsigned int svui[4] __attribute__ ((aligned (16)));
6940
+static signed int svsi[4] __attribute__ ((aligned (16)));
6941
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
6942
+static float svf[4] __attribute__ ((aligned (16)));
6944
+static void init ()
6947
+ for (i = 0; i < 16; ++i)
6951
+ svbc[i] = (i % 2) ? 0xff : 0;
6953
+ for (i = 0; i < 8; ++i)
6957
+ svbs[i] = (i % 2) ? 0xffff : 0;
6960
+ for (i = 0; i < 4; ++i)
6964
+ svbi[i] = (i % 2) ? 0xffffffff : 0;
6965
+ svf[i] = i * 1.0f;
6969
+static void test ()
6971
+ vector unsigned char evuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6972
+ vector signed char evsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
6973
+ vector bool char evbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
6974
+ vector unsigned short evus = {0,1,2,3,4,5,6,7};
6975
+ vector signed short evss = {-4,-3,-2,-1,0,1,2,3};
6976
+ vector bool short evbs = {0,65535,0,65535,0,65535,0,65535};
6977
+ vector pixel evp = {0,1,2,3,4,5,6,7};
6978
+ vector unsigned int evui = {0,1,2,3};
6979
+ vector signed int evsi = {-2,-1,0,1};
6980
+ vector bool int evbi = {0,0xffffffff,0,0xffffffff};
6981
+ vector float evf = {0.0,1.0,2.0,3.0};
6983
+ vector unsigned char vuc;
6984
+ vector signed char vsc;
6985
+ vector bool char vbc;
6986
+ vector unsigned short vus;
6987
+ vector signed short vss;
6988
+ vector bool short vbs;
6990
+ vector unsigned int vui;
6991
+ vector signed int vsi;
6992
+ vector bool int vbi;
6997
+ vuc = vec_ldl (0, (vector unsigned char *)svuc);
6998
+ vsc = vec_ldl (0, (vector signed char *)svsc);
6999
+ vbc = vec_ldl (0, (vector bool char *)svbc);
7000
+ vus = vec_ldl (0, (vector unsigned short *)svus);
7001
+ vss = vec_ldl (0, (vector signed short *)svss);
7002
+ vbs = vec_ldl (0, (vector bool short *)svbs);
7003
+ vp = vec_ldl (0, (vector pixel *)svp);
7004
+ vui = vec_ldl (0, (vector unsigned int *)svui);
7005
+ vsi = vec_ldl (0, (vector signed int *)svsi);
7006
+ vbi = vec_ldl (0, (vector bool int *)svbi);
7007
+ vf = vec_ldl (0, (vector float *)svf);
7009
+ check (vec_all_eq (vuc, evuc), "vuc");
7010
+ check (vec_all_eq (vsc, evsc), "vsc");
7011
+ check (vec_all_eq (vbc, evbc), "vbc");
7012
+ check (vec_all_eq (vus, evus), "vus");
7013
+ check (vec_all_eq (vss, evss), "vss");
7014
+ check (vec_all_eq (vbs, evbs), "vbs");
7015
+ check (vec_all_eq (vp, evp ), "vp" );
7016
+ check (vec_all_eq (vui, evui), "vui");
7017
+ check (vec_all_eq (vsi, evsi), "vsi");
7018
+ check (vec_all_eq (vbi, evbi), "vbi");
7019
+ check (vec_all_eq (vf, evf ), "vf" );
7021
--- a/src/gcc/testsuite/gcc.dg/vmx/stl-vsx-be-order.c
7022
+++ b/src/gcc/testsuite/gcc.dg/vmx/stl-vsx-be-order.c
7024
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7025
+/* { dg-require-effective-target powerpc_vsx_ok } */
7026
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7028
+#include "harness.h"
7030
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
7031
+static double svd[2] __attribute__ ((aligned (16)));
7033
+static void check_arrays ()
7036
+ for (i = 0; i < 2; ++i)
7038
+ check (svul[i] == i, "svul");
7039
+ check (svd[i] == i * 1.0, "svd");
7043
+static void test ()
7045
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7046
+ vector unsigned long long vul = {1,0};
7047
+ vector double vd = {1.0,0.0};
7049
+ vector unsigned long long vul = {0,1};
7050
+ vector double vd = {0.0,1.0};
7053
+ vec_stl (vul, 0, (vector unsigned long long *)svul);
7054
+ vec_stl (vd, 0, (vector double *)svd);
7058
--- a/src/gcc/testsuite/gcc.dg/vmx/vsums.c
7059
+++ b/src/gcc/testsuite/gcc.dg/vmx/vsums.c
7061
+#include "harness.h"
7065
+ vector signed int va = {-7,11,-13,17};
7066
+ vector signed int vb = {0,0,0,128};
7067
+ vector signed int evd = {0,0,0,136};
7069
+ vector signed int vd = vec_sums (va, vb);
7071
+ check (vec_all_eq (vd, evd), "sums");
7073
--- a/src/gcc/testsuite/gcc.dg/vmx/insert-vsx-be-order.c
7074
+++ b/src/gcc/testsuite/gcc.dg/vmx/insert-vsx-be-order.c
7076
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7077
+/* { dg-require-effective-target powerpc_vsx_ok } */
7078
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7080
+#include "harness.h"
7082
+static int vec_long_long_eq (vector long long x, vector long long y)
7084
+ return (x[0] == y[0] && x[1] == y[1]);
7087
+static int vec_dbl_eq (vector double x, vector double y)
7089
+ return (x[0] == y[0] && x[1] == y[1]);
7094
+ vector long long vl = {0, 1};
7095
+ vector double vd = {0.0, 1.0};
7096
+ vector long long vlr = vec_insert (2, vl, 0);
7097
+ vector double vdr = vec_insert (2.0, vd, 1);
7099
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7100
+ vector long long vler = {0, 2};
7101
+ vector double vder = {2.0, 1.0};
7103
+ vector long long vler = {2, 1};
7104
+ vector double vder = {0.0, 2.0};
7107
+ check (vec_long_long_eq (vlr, vler), "vl");
7108
+ check (vec_dbl_eq (vdr, vder), "vd");
7110
--- a/src/gcc/testsuite/gcc.dg/vmx/unpack.c
7111
+++ b/src/gcc/testsuite/gcc.dg/vmx/unpack.c
7113
+#include "harness.h"
7115
+#define BIG 4294967295
7119
+ /* Input vectors. */
7120
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
7121
+ vector bool char vbc = {0,255,255,0,0,0,255,0,255,0,0,255,255,255,0,255};
7122
+ vector pixel vp = {(0<<15) + (1<<10) + (2<<5) + 3,
7123
+ (1<<15) + (4<<10) + (5<<5) + 6,
7124
+ (0<<15) + (7<<10) + (8<<5) + 9,
7125
+ (1<<15) + (10<<10) + (11<<5) + 12,
7126
+ (1<<15) + (13<<10) + (14<<5) + 15,
7127
+ (0<<15) + (16<<10) + (17<<5) + 18,
7128
+ (1<<15) + (19<<10) + (20<<5) + 21,
7129
+ (0<<15) + (22<<10) + (23<<5) + 24};
7130
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
7131
+ vector bool short vbs = {0,65535,65535,0,0,0,65535,0};
7133
+ /* Result vectors. */
7134
+ vector signed short vsch, vscl;
7135
+ vector bool short vbsh, vbsl;
7136
+ vector unsigned int vuih, vuil;
7137
+ vector signed int vsih, vsil;
7138
+ vector bool int vbih, vbil;
7140
+ /* Expected result vectors. */
7141
+ vector signed short vschr = {-8,-7,-6,-5,-4,-3,-2,-1};
7142
+ vector signed short vsclr = {0,1,2,3,4,5,6,7};
7143
+ vector bool short vbshr = {0,65535,65535,0,0,0,65535,0};
7144
+ vector bool short vbslr = {65535,0,0,65535,65535,65535,0,65535};
7145
+ vector unsigned int vuihr = {(0<<24) + (1<<16) + (2<<8) + 3,
7146
+ (65535<<24) + (4<<16) + (5<<8) + 6,
7147
+ (0<<24) + (7<<16) + (8<<8) + 9,
7148
+ (65535<<24) + (10<<16) + (11<<8) + 12};
7149
+ vector unsigned int vuilr = {(65535<<24) + (13<<16) + (14<<8) + 15,
7150
+ (0<<24) + (16<<16) + (17<<8) + 18,
7151
+ (65535<<24) + (19<<16) + (20<<8) + 21,
7152
+ (0<<24) + (22<<16) + (23<<8) + 24};
7153
+ vector signed int vsihr = {-4,-3,-2,-1};
7154
+ vector signed int vsilr = {0,1,2,3};
7155
+ vector bool int vbihr = {0,BIG,BIG,0};
7156
+ vector bool int vbilr = {0,0,BIG,0};
7158
+ vsch = vec_unpackh (vsc);
7159
+ vscl = vec_unpackl (vsc);
7160
+ vbsh = vec_unpackh (vbc);
7161
+ vbsl = vec_unpackl (vbc);
7162
+ vuih = vec_unpackh (vp);
7163
+ vuil = vec_unpackl (vp);
7164
+ vsih = vec_unpackh (vss);
7165
+ vsil = vec_unpackl (vss);
7166
+ vbih = vec_unpackh (vbs);
7167
+ vbil = vec_unpackl (vbs);
7169
+ check (vec_all_eq (vsch, vschr), "vsch");
7170
+ check (vec_all_eq (vscl, vsclr), "vscl");
7171
+ check (vec_all_eq (vbsh, vbshr), "vbsh");
7172
+ check (vec_all_eq (vbsl, vbslr), "vbsl");
7173
+ check (vec_all_eq (vuih, vuihr), "vuih");
7174
+ check (vec_all_eq (vuil, vuilr), "vuil");
7175
+ check (vec_all_eq (vsih, vsihr), "vsih");
7176
+ check (vec_all_eq (vsil, vsilr), "vsil");
7177
+ check (vec_all_eq (vbih, vbihr), "vbih");
7178
+ check (vec_all_eq (vbil, vbilr), "vbil");
7180
--- a/src/gcc/testsuite/gcc.dg/vmx/splat.c
7181
+++ b/src/gcc/testsuite/gcc.dg/vmx/splat.c
7183
+#include "harness.h"
7187
+ /* Input vectors. */
7188
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7189
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
7190
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
7191
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
7192
+ vector unsigned int vui = {0,1,2,3};
7193
+ vector signed int vsi = {-2,-1,0,1};
7194
+ vector float vf = {-2.0,-1.0,0.0,1.0};
7196
+ /* Result vectors. */
7197
+ vector unsigned char vucr;
7198
+ vector signed char vscr;
7199
+ vector unsigned short vusr;
7200
+ vector signed short vssr;
7201
+ vector unsigned int vuir;
7202
+ vector signed int vsir;
7205
+ /* Expected result vectors. */
7206
+ vector unsigned char vucer = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
7207
+ vector signed char vscer = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
7208
+ vector unsigned short vuser = {7,7,7,7,7,7,7,7};
7209
+ vector signed short vsser = {-4,-4,-4,-4,-4,-4,-4,-4};
7210
+ vector unsigned int vuier = {2,2,2,2};
7211
+ vector signed int vsier = {1,1,1,1};
7212
+ vector float vfer = {-1.0,-1.0,-1.0,-1.0};
7214
+ vucr = vec_splat (vuc, 1);
7215
+ vscr = vec_splat (vsc, 8);
7216
+ vusr = vec_splat (vus, 7);
7217
+ vssr = vec_splat (vss, 0);
7218
+ vuir = vec_splat (vui, 2);
7219
+ vsir = vec_splat (vsi, 3);
7220
+ vfr = vec_splat (vf, 1);
7222
+ check (vec_all_eq (vucr, vucer), "vuc");
7223
+ check (vec_all_eq (vscr, vscer), "vsc");
7224
+ check (vec_all_eq (vusr, vuser), "vus");
7225
+ check (vec_all_eq (vssr, vsser), "vss");
7226
+ check (vec_all_eq (vuir, vuier), "vui");
7227
+ check (vec_all_eq (vsir, vsier), "vsi");
7228
+ check (vec_all_eq (vfr, vfer ), "vf");
7230
--- a/src/gcc/testsuite/gcc.dg/vmx/ldl-vsx-be-order.c
7231
+++ b/src/gcc/testsuite/gcc.dg/vmx/ldl-vsx-be-order.c
7233
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7234
+/* { dg-require-effective-target powerpc_vsx_ok } */
7235
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7237
+#include "harness.h"
7239
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
7240
+static double svd[2] __attribute__ ((aligned (16)));
7242
+static void init ()
7245
+ for (i = 0; i < 2; ++i)
7252
+static void test ()
7254
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7255
+ vector unsigned long long evul = {1,0};
7256
+ vector double evd = {1.0,0.0};
7258
+ vector unsigned long long evul = {0,1};
7259
+ vector double evd = {0.0,1.0};
7262
+ vector unsigned long long vul;
7268
+ vul = vec_ldl (0, (vector unsigned long long *)svul);
7269
+ vd = vec_ldl (0, (vector double *)svd);
7271
+ for (i = 0; i < 2; ++i)
7273
+ check (vul[i] == evul[i], "vul");
7274
+ check (vd[i] == evd[i], "vd" );
7277
--- a/src/gcc/testsuite/gcc.dg/vmx/merge-be-order.c
7278
+++ b/src/gcc/testsuite/gcc.dg/vmx/merge-be-order.c
7280
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
7282
+#include "harness.h"
7286
+ /* Input vectors. */
7287
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7288
+ vector unsigned char vucb
7289
+ = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7290
+ vector signed char vsca
7291
+ = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
7292
+ vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7293
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
7294
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
7295
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
7296
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
7297
+ vector unsigned int vuia = {0,1,2,3};
7298
+ vector unsigned int vuib = {4,5,6,7};
7299
+ vector signed int vsia = {-4,-3,-2,-1};
7300
+ vector signed int vsib = {0,1,2,3};
7301
+ vector float vfa = {-4.0,-3.0,-2.0,-1.0};
7302
+ vector float vfb = {0.0,1.0,2.0,3.0};
7304
+ /* Result vectors. */
7305
+ vector unsigned char vuch, vucl;
7306
+ vector signed char vsch, vscl;
7307
+ vector unsigned short vush, vusl;
7308
+ vector signed short vssh, vssl;
7309
+ vector unsigned int vuih, vuil;
7310
+ vector signed int vsih, vsil;
7311
+ vector float vfh, vfl;
7313
+ /* Expected result vectors. */
7314
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7315
+ vector unsigned char vucrh = {24,8,25,9,26,10,27,11,28,12,29,13,30,14,31,15};
7316
+ vector unsigned char vucrl = {16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7};
7317
+ vector signed char vscrh = {8,-8,9,-7,10,-6,11,-5,12,-4,13,-3,14,-2,15,-1};
7318
+ vector signed char vscrl = {0,-16,1,-15,2,-14,3,-13,4,-12,5,-11,6,-10,7,-9};
7319
+ vector unsigned short vusrh = {12,4,13,5,14,6,15,7};
7320
+ vector unsigned short vusrl = {8,0,9,1,10,2,11,3};
7321
+ vector signed short vssrh = {4,-4,5,-3,6,-2,7,-1};
7322
+ vector signed short vssrl = {0,-8,1,-7,2,-6,3,-5};
7323
+ vector unsigned int vuirh = {6,2,7,3};
7324
+ vector unsigned int vuirl = {4,0,5,1};
7325
+ vector signed int vsirh = {2,-2,3,-1};
7326
+ vector signed int vsirl = {0,-4,1,-3};
7327
+ vector float vfrh = {2.0,-2.0,3.0,-1.0};
7328
+ vector float vfrl = {0.0,-4.0,1.0,-3.0};
7330
+ vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23};
7331
+ vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31};
7332
+ vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7};
7333
+ vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15};
7334
+ vector unsigned short vusrh = {0,8,1,9,2,10,3,11};
7335
+ vector unsigned short vusrl = {4,12,5,13,6,14,7,15};
7336
+ vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3};
7337
+ vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7};
7338
+ vector unsigned int vuirh = {0,4,1,5};
7339
+ vector unsigned int vuirl = {2,6,3,7};
7340
+ vector signed int vsirh = {-4,0,-3,1};
7341
+ vector signed int vsirl = {-2,2,-1,3};
7342
+ vector float vfrh = {-4.0,0.0,-3.0,1.0};
7343
+ vector float vfrl = {-2.0,2.0,-1.0,3.0};
7346
+ vuch = vec_mergeh (vuca, vucb);
7347
+ vucl = vec_mergel (vuca, vucb);
7348
+ vsch = vec_mergeh (vsca, vscb);
7349
+ vscl = vec_mergel (vsca, vscb);
7350
+ vush = vec_mergeh (vusa, vusb);
7351
+ vusl = vec_mergel (vusa, vusb);
7352
+ vssh = vec_mergeh (vssa, vssb);
7353
+ vssl = vec_mergel (vssa, vssb);
7354
+ vuih = vec_mergeh (vuia, vuib);
7355
+ vuil = vec_mergel (vuia, vuib);
7356
+ vsih = vec_mergeh (vsia, vsib);
7357
+ vsil = vec_mergel (vsia, vsib);
7358
+ vfh = vec_mergeh (vfa, vfb );
7359
+ vfl = vec_mergel (vfa, vfb );
7361
+ check (vec_all_eq (vuch, vucrh), "vuch");
7362
+ check (vec_all_eq (vucl, vucrl), "vucl");
7363
+ check (vec_all_eq (vsch, vscrh), "vsch");
7364
+ check (vec_all_eq (vscl, vscrl), "vscl");
7365
+ check (vec_all_eq (vush, vusrh), "vush");
7366
+ check (vec_all_eq (vusl, vusrl), "vusl");
7367
+ check (vec_all_eq (vssh, vssrh), "vssh");
7368
+ check (vec_all_eq (vssl, vssrl), "vssl");
7369
+ check (vec_all_eq (vuih, vuirh), "vuih");
7370
+ check (vec_all_eq (vuil, vuirl), "vuil");
7371
+ check (vec_all_eq (vsih, vsirh), "vsih");
7372
+ check (vec_all_eq (vsil, vsirl), "vsil");
7373
+ check (vec_all_eq (vfh, vfrh), "vfh");
7374
+ check (vec_all_eq (vfl, vfrl), "vfl");
7376
--- a/src/gcc/testsuite/gcc.dg/vmx/splat-vsx-be-order.c
7377
+++ b/src/gcc/testsuite/gcc.dg/vmx/splat-vsx-be-order.c
7379
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7380
+/* { dg-require-effective-target powerpc_vsx_ok } */
7381
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7383
+#include "harness.h"
7387
+ /* Input vectors. */
7388
+ vector unsigned int vui = {0,1,2,3};
7389
+ vector signed int vsi = {-2,-1,0,1};
7390
+ vector float vf = {-2.0,-1.0,0.0,1.0};
7392
+ /* Result vectors. */
7393
+ vector unsigned int vuir;
7394
+ vector signed int vsir;
7397
+ /* Expected result vectors. */
7398
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7399
+ vector unsigned int vuier = {1,1,1,1};
7400
+ vector signed int vsier = {-2,-2,-2,-2};
7401
+ vector float vfer = {0.0,0.0,0.0,0.0};
7403
+ vector unsigned int vuier = {2,2,2,2};
7404
+ vector signed int vsier = {1,1,1,1};
7405
+ vector float vfer = {-1.0,-1.0,-1.0,-1.0};
7408
+ vuir = vec_splat (vui, 2);
7409
+ vsir = vec_splat (vsi, 3);
7410
+ vfr = vec_splat (vf, 1);
7412
+ check (vec_all_eq (vuir, vuier), "vui");
7413
+ check (vec_all_eq (vsir, vsier), "vsi");
7414
+ check (vec_all_eq (vfr, vfer ), "vf");
7416
--- a/src/gcc/testsuite/gcc.dg/vmx/merge.c
7417
+++ b/src/gcc/testsuite/gcc.dg/vmx/merge.c
7419
+#include "harness.h"
7423
+ /* Input vectors. */
7424
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7425
+ vector unsigned char vucb
7426
+ = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7427
+ vector signed char vsca
7428
+ = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
7429
+ vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7430
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
7431
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
7432
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
7433
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
7434
+ vector unsigned int vuia = {0,1,2,3};
7435
+ vector unsigned int vuib = {4,5,6,7};
7436
+ vector signed int vsia = {-4,-3,-2,-1};
7437
+ vector signed int vsib = {0,1,2,3};
7438
+ vector float vfa = {-4.0,-3.0,-2.0,-1.0};
7439
+ vector float vfb = {0.0,1.0,2.0,3.0};
7441
+ /* Result vectors. */
7442
+ vector unsigned char vuch, vucl;
7443
+ vector signed char vsch, vscl;
7444
+ vector unsigned short vush, vusl;
7445
+ vector signed short vssh, vssl;
7446
+ vector unsigned int vuih, vuil;
7447
+ vector signed int vsih, vsil;
7448
+ vector float vfh, vfl;
7450
+ /* Expected result vectors. */
7451
+ vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23};
7452
+ vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31};
7453
+ vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7};
7454
+ vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15};
7455
+ vector unsigned short vusrh = {0,8,1,9,2,10,3,11};
7456
+ vector unsigned short vusrl = {4,12,5,13,6,14,7,15};
7457
+ vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3};
7458
+ vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7};
7459
+ vector unsigned int vuirh = {0,4,1,5};
7460
+ vector unsigned int vuirl = {2,6,3,7};
7461
+ vector signed int vsirh = {-4,0,-3,1};
7462
+ vector signed int vsirl = {-2,2,-1,3};
7463
+ vector float vfrh = {-4.0,0.0,-3.0,1.0};
7464
+ vector float vfrl = {-2.0,2.0,-1.0,3.0};
7466
+ vuch = vec_mergeh (vuca, vucb);
7467
+ vucl = vec_mergel (vuca, vucb);
7468
+ vsch = vec_mergeh (vsca, vscb);
7469
+ vscl = vec_mergel (vsca, vscb);
7470
+ vush = vec_mergeh (vusa, vusb);
7471
+ vusl = vec_mergel (vusa, vusb);
7472
+ vssh = vec_mergeh (vssa, vssb);
7473
+ vssl = vec_mergel (vssa, vssb);
7474
+ vuih = vec_mergeh (vuia, vuib);
7475
+ vuil = vec_mergel (vuia, vuib);
7476
+ vsih = vec_mergeh (vsia, vsib);
7477
+ vsil = vec_mergel (vsia, vsib);
7478
+ vfh = vec_mergeh (vfa, vfb );
7479
+ vfl = vec_mergel (vfa, vfb );
7481
+ check (vec_all_eq (vuch, vucrh), "vuch");
7482
+ check (vec_all_eq (vucl, vucrl), "vucl");
7483
+ check (vec_all_eq (vsch, vscrh), "vsch");
7484
+ check (vec_all_eq (vscl, vscrl), "vscl");
7485
+ check (vec_all_eq (vush, vusrh), "vush");
7486
+ check (vec_all_eq (vusl, vusrl), "vusl");
7487
+ check (vec_all_eq (vssh, vssrh), "vssh");
7488
+ check (vec_all_eq (vssl, vssrl), "vssl");
7489
+ check (vec_all_eq (vuih, vuirh), "vuih");
7490
+ check (vec_all_eq (vuil, vuirl), "vuil");
7491
+ check (vec_all_eq (vsih, vsirh), "vsih");
7492
+ check (vec_all_eq (vsil, vsirl), "vsil");
7493
+ check (vec_all_eq (vfh, vfrh), "vfh");
7494
+ check (vec_all_eq (vfl, vfrl), "vfl");
7496
--- a/src/gcc/testsuite/gcc.dg/vmx/vec-set.c
7497
+++ b/src/gcc/testsuite/gcc.dg/vmx/vec-set.c
7499
+#include "harness.h"
7504
+ return (vector short){m, 0, 0, 0, 0, 0, 0, 0};
7509
+ check (vec_all_eq (vec_set (7),
7510
+ ((vector short){7, 0, 0, 0, 0, 0, 0, 0})),
7513
--- a/src/gcc/testsuite/gcc.dg/vmx/ld-vsx-be-order.c
7514
+++ b/src/gcc/testsuite/gcc.dg/vmx/ld-vsx-be-order.c
7516
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7517
+/* { dg-require-effective-target powerpc_vsx_ok } */
7518
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7520
+#include "harness.h"
7522
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
7523
+static double svd[2] __attribute__ ((aligned (16)));
7525
+static void init ()
7528
+ for (i = 0; i < 2; ++i)
7535
+static void test ()
7537
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7538
+ vector unsigned long long evul = {1,0};
7539
+ vector double evd = {1.0,0.0};
7541
+ vector unsigned long long evul = {0,1};
7542
+ vector double evd = {0.0,1.0};
7545
+ vector unsigned long long vul;
7551
+ vul = vec_ld (0, (vector unsigned long long *)svul);
7552
+ vd = vec_ld (0, (vector double *)svd);
7554
+ for (i = 0; i < 2; ++i)
7556
+ check (vul[i] == evul[i], "vul");
7557
+ check (vd[i] == evd[i], "vd" );
7560
--- a/src/gcc/testsuite/gcc.dg/vmx/extract.c
7561
+++ b/src/gcc/testsuite/gcc.dg/vmx/extract.c
7563
+#include "harness.h"
7567
+ vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7568
+ vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
7569
+ vector unsigned short vc = {0,1,2,3,4,5,6,7};
7570
+ vector signed short vd = {-4,-3,-2,-1,0,1,2,3};
7571
+ vector unsigned int ve = {0,1,2,3};
7572
+ vector signed int vf = {-2,-1,0,1};
7573
+ vector float vg = {-2.0f,-1.0f,0.0f,1.0f};
7575
+ check (vec_extract (va, 5) == 5, "vec_extract (va, 5)");
7576
+ check (vec_extract (vb, 0) == -8, "vec_extract (vb, 0)");
7577
+ check (vec_extract (vc, 7) == 7, "vec_extract (vc, 7)");
7578
+ check (vec_extract (vd, 3) == -1, "vec_extract (vd, 3)");
7579
+ check (vec_extract (ve, 2) == 2, "vec_extract (ve, 2)");
7580
+ check (vec_extract (vf, 1) == -1, "vec_extract (vf, 1)");
7581
+ check (vec_extract (vg, 0) == -2.0f, "vec_extract (vg, 0)");
7584
--- a/src/gcc/testsuite/gcc.dg/vmx/pack-be-order.c
7585
+++ b/src/gcc/testsuite/gcc.dg/vmx/pack-be-order.c
7587
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
7589
+#include "harness.h"
7591
+#define BIG 4294967295
7595
+ /* Input vectors. */
7596
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
7597
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
7598
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
7599
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
7600
+ vector bool short vbsa = {0,65535,65535,0,0,0,65535,0};
7601
+ vector bool short vbsb = {65535,0,0,65535,65535,65535,0,65535};
7602
+ vector unsigned int vuia = {0,1,2,3};
7603
+ vector unsigned int vuib = {4,5,6,7};
7604
+ vector signed int vsia = {-4,-3,-2,-1};
7605
+ vector signed int vsib = {0,1,2,3};
7606
+ vector bool int vbia = {0,BIG,BIG,BIG};
7607
+ vector bool int vbib = {BIG,0,0,0};
7608
+ vector unsigned int vipa = {(0<<24) + (2<<19) + (3<<11) + (4<<3),
7609
+ (1<<24) + (5<<19) + (6<<11) + (7<<3),
7610
+ (0<<24) + (8<<19) + (9<<11) + (10<<3),
7611
+ (1<<24) + (11<<19) + (12<<11) + (13<<3)};
7612
+ vector unsigned int vipb = {(1<<24) + (14<<19) + (15<<11) + (16<<3),
7613
+ (0<<24) + (17<<19) + (18<<11) + (19<<3),
7614
+ (1<<24) + (20<<19) + (21<<11) + (22<<3),
7615
+ (0<<24) + (23<<19) + (24<<11) + (25<<3)};
7616
+ vector unsigned short vusc = {0,256,1,257,2,258,3,259};
7617
+ vector unsigned short vusd = {4,260,5,261,6,262,7,263};
7618
+ vector signed short vssc = {-1,-128,0,127,-2,-129,1,128};
7619
+ vector signed short vssd = {-3,-130,2,129,-4,-131,3,130};
7620
+ vector unsigned int vuic = {0,65536,1,65537};
7621
+ vector unsigned int vuid = {2,65538,3,65539};
7622
+ vector signed int vsic = {-1,-32768,0,32767};
7623
+ vector signed int vsid = {-2,-32769,1,32768};
7625
+ /* Result vectors. */
7626
+ vector unsigned char vucr;
7627
+ vector signed char vscr;
7628
+ vector bool char vbcr;
7629
+ vector unsigned short vusr;
7630
+ vector signed short vssr;
7631
+ vector bool short vbsr;
7633
+ vector unsigned char vucsr;
7634
+ vector signed char vscsr;
7635
+ vector unsigned short vussr;
7636
+ vector signed short vsssr;
7637
+ vector unsigned char vucsur1, vucsur2;
7638
+ vector unsigned short vussur1, vussur2;
7640
+ /* Expected result vectors. */
7641
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7642
+ vector unsigned char vucer = {8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7};
7643
+ vector signed char vscer = {0,1,2,3,4,5,6,7,-8,-7,-6,-5,-4,-3,-2,-1};
7644
+ vector bool char vbcer = {255,0,0,255,255,255,0,255,0,255,255,0,0,0,255,0};
7645
+ vector unsigned short vuser = {4,5,6,7,0,1,2,3};
7646
+ vector signed short vsser = {0,1,2,3,-4,-3,-2,-1};
7647
+ vector bool short vbser = {65535,0,0,0,0,65535,65535,65535};
7648
+ vector pixel vper = {(1<<15) + (14<<10) + (15<<5) + 16,
7649
+ (0<<15) + (17<<10) + (18<<5) + 19,
7650
+ (1<<15) + (20<<10) + (21<<5) + 22,
7651
+ (0<<15) + (23<<10) + (24<<5) + 25,
7652
+ (0<<15) + (2<<10) + (3<<5) + 4,
7653
+ (1<<15) + (5<<10) + (6<<5) + 7,
7654
+ (0<<15) + (8<<10) + (9<<5) + 10,
7655
+ (1<<15) + (11<<10) + (12<<5) + 13};
7656
+ vector unsigned char vucser = {4,255,5,255,6,255,7,255,0,255,1,255,2,255,3,255};
7657
+ vector signed char vscser = {-3,-128,2,127,-4,-128,3,127,
7658
+ -1,-128,0,127,-2,-128,1,127};
7659
+ vector unsigned short vusser = {2,65535,3,65535,0,65535,1,65535};
7660
+ vector signed short vssser = {-2,-32768,1,32767,-1,-32768,0,32767};
7661
+ vector unsigned char vucsuer1 = {4,255,5,255,6,255,7,255,0,255,1,255,2,255,3,255};
7662
+ vector unsigned char vucsuer2 = {0,0,2,129,0,0,3,130,0,0,0,127,0,0,1,128};
7663
+ vector unsigned short vussuer1 = {2,65535,3,65535,0,65535,1,65535};
7664
+ vector unsigned short vussuer2 = {0,0,1,32768,0,0,0,32767};
7666
+ vector unsigned char vucer = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7667
+ vector signed char vscer = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
7668
+ vector bool char vbcer = {0,255,255,0,0,0,255,0,255,0,0,255,255,255,0,255};
7669
+ vector unsigned short vuser = {0,1,2,3,4,5,6,7};
7670
+ vector signed short vsser = {-4,-3,-2,-1,0,1,2,3};
7671
+ vector bool short vbser = {0,65535,65535,65535,65535,0,0,0};
7672
+ vector pixel vper = {(0<<15) + (2<<10) + (3<<5) + 4,
7673
+ (1<<15) + (5<<10) + (6<<5) + 7,
7674
+ (0<<15) + (8<<10) + (9<<5) + 10,
7675
+ (1<<15) + (11<<10) + (12<<5) + 13,
7676
+ (1<<15) + (14<<10) + (15<<5) + 16,
7677
+ (0<<15) + (17<<10) + (18<<5) + 19,
7678
+ (1<<15) + (20<<10) + (21<<5) + 22,
7679
+ (0<<15) + (23<<10) + (24<<5) + 25};
7680
+ vector unsigned char vucser = {0,255,1,255,2,255,3,255,4,255,5,255,6,255,7,255};
7681
+ vector signed char vscser = {-1,-128,0,127,-2,-128,1,127,
7682
+ -3,-128,2,127,-4,-128,3,127};
7683
+ vector unsigned short vusser = {0,65535,1,65535,2,65535,3,65535};
7684
+ vector signed short vssser = {-1,-32768,0,32767,-2,-32768,1,32767};
7685
+ vector unsigned char vucsuer1 = {0,255,1,255,2,255,3,255,4,255,5,255,6,255,7,255};
7686
+ vector unsigned char vucsuer2 = {0,0,0,127,0,0,1,128,0,0,2,129,0,0,3,130};
7687
+ vector unsigned short vussuer1 = {0,65535,1,65535,2,65535,3,65535};
7688
+ vector unsigned short vussuer2 = {0,0,0,32767,0,0,1,32768};
7691
+ vucr = vec_pack (vusa, vusb);
7692
+ vscr = vec_pack (vssa, vssb);
7693
+ vbcr = vec_pack (vbsa, vbsb);
7694
+ vusr = vec_pack (vuia, vuib);
7695
+ vssr = vec_pack (vsia, vsib);
7696
+ vbsr = vec_pack (vbia, vbib);
7697
+ vpr = vec_packpx (vipa, vipb);
7698
+ vucsr = vec_packs (vusc, vusd);
7699
+ vscsr = vec_packs (vssc, vssd);
7700
+ vussr = vec_packs (vuic, vuid);
7701
+ vsssr = vec_packs (vsic, vsid);
7702
+ vucsur1 = vec_packsu (vusc, vusd);
7703
+ vucsur2 = vec_packsu (vssc, vssd);
7704
+ vussur1 = vec_packsu (vuic, vuid);
7705
+ vussur2 = vec_packsu (vsic, vsid);
7707
+ check (vec_all_eq (vucr, vucer), "vucr");
7708
+ check (vec_all_eq (vscr, vscer), "vscr");
7709
+ check (vec_all_eq (vbcr, vbcer), "vbcr");
7710
+ check (vec_all_eq (vusr, vuser), "vusr");
7711
+ check (vec_all_eq (vssr, vsser), "vssr");
7712
+ check (vec_all_eq (vbsr, vbser), "vbsr");
7713
+ check (vec_all_eq (vpr, vper ), "vpr" );
7714
+ check (vec_all_eq (vucsr, vucser), "vucsr");
7715
+ check (vec_all_eq (vscsr, vscser), "vscsr");
7716
+ check (vec_all_eq (vussr, vusser), "vussr");
7717
+ check (vec_all_eq (vsssr, vssser), "vsssr");
7718
+ check (vec_all_eq (vucsur1, vucsuer1), "vucsur1");
7719
+ check (vec_all_eq (vucsur2, vucsuer2), "vucsur2");
7720
+ check (vec_all_eq (vussur1, vussuer1), "vussur1");
7721
+ check (vec_all_eq (vussur2, vussuer2), "vussur2");
7723
--- a/src/gcc/testsuite/gcc.dg/vmx/st-be-order.c
7724
+++ b/src/gcc/testsuite/gcc.dg/vmx/st-be-order.c
7726
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
7728
+#include "harness.h"
7730
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
7731
+static signed char svsc[16] __attribute__ ((aligned (16)));
7732
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
7733
+static unsigned short svus[8] __attribute__ ((aligned (16)));
7734
+static signed short svss[8] __attribute__ ((aligned (16)));
7735
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
7736
+static unsigned short svp[8] __attribute__ ((aligned (16)));
7737
+static unsigned int svui[4] __attribute__ ((aligned (16)));
7738
+static signed int svsi[4] __attribute__ ((aligned (16)));
7739
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
7740
+static float svf[4] __attribute__ ((aligned (16)));
7742
+static void check_arrays ()
7745
+ for (i = 0; i < 16; ++i)
7747
+ check (svuc[i] == i, "svuc");
7748
+ check (svsc[i] == i - 8, "svsc");
7749
+ check (svbc[i] == ((i % 2) ? 0xff : 0), "svbc");
7751
+ for (i = 0; i < 8; ++i)
7753
+ check (svus[i] == i, "svus");
7754
+ check (svss[i] == i - 4, "svss");
7755
+ check (svbs[i] == ((i % 2) ? 0xffff : 0), "svbs");
7756
+ check (svp[i] == i, "svp");
7758
+ for (i = 0; i < 4; ++i)
7760
+ check (svui[i] == i, "svui");
7761
+ check (svsi[i] == i - 2, "svsi");
7762
+ check (svbi[i] == ((i % 2) ? 0xffffffff : 0), "svbi");
7763
+ check (svf[i] == i * 1.0f, "svf");
7767
+static void test ()
7769
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7770
+ vector unsigned char vuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
7771
+ vector signed char vsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};
7772
+ vector bool char vbc = {255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0};
7773
+ vector unsigned short vus = {7,6,5,4,3,2,1,0};
7774
+ vector signed short vss = {3,2,1,0,-1,-2,-3,-4};
7775
+ vector bool short vbs = {65535,0,65535,0,65535,0,65535,0};
7776
+ vector pixel vp = {7,6,5,4,3,2,1,0};
7777
+ vector unsigned int vui = {3,2,1,0};
7778
+ vector signed int vsi = {1,0,-1,-2};
7779
+ vector bool int vbi = {0xffffffff,0,0xffffffff,0};
7780
+ vector float vf = {3.0,2.0,1.0,0.0};
7782
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
7783
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
7784
+ vector bool char vbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
7785
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
7786
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
7787
+ vector bool short vbs = {0,65535,0,65535,0,65535,0,65535};
7788
+ vector pixel vp = {0,1,2,3,4,5,6,7};
7789
+ vector unsigned int vui = {0,1,2,3};
7790
+ vector signed int vsi = {-2,-1,0,1};
7791
+ vector bool int vbi = {0,0xffffffff,0,0xffffffff};
7792
+ vector float vf = {0.0,1.0,2.0,3.0};
7795
+ vec_st (vuc, 0, (vector unsigned char *)svuc);
7796
+ vec_st (vsc, 0, (vector signed char *)svsc);
7797
+ vec_st (vbc, 0, (vector bool char *)svbc);
7798
+ vec_st (vus, 0, (vector unsigned short *)svus);
7799
+ vec_st (vss, 0, (vector signed short *)svss);
7800
+ vec_st (vbs, 0, (vector bool short *)svbs);
7801
+ vec_st (vp, 0, (vector pixel *)svp);
7802
+ vec_st (vui, 0, (vector unsigned int *)svui);
7803
+ vec_st (vsi, 0, (vector signed int *)svsi);
7804
+ vec_st (vbi, 0, (vector bool int *)svbi);
7805
+ vec_st (vf, 0, (vector float *)svf);
7809
--- a/src/gcc/testsuite/gcc.dg/vmx/gcc-bug-i.c
7810
+++ b/src/gcc/testsuite/gcc.dg/vmx/gcc-bug-i.c
7812
#define DO_INLINE __attribute__ ((always_inline))
7813
#define DONT_INLINE __attribute__ ((noinline))
7815
+#ifdef __LITTLE_ENDIAN__
7816
+static inline DO_INLINE int inline_me(vector signed short data)
7818
+ union {vector signed short v; signed short s[8];} u;
7820
+ unsigned char x1, x2;
7824
+ x1 = (x >> 8) & 0xff;
7826
+ return ((x2 << 8) | x1);
7829
static inline DO_INLINE int inline_me(vector signed short data)
7831
union {vector signed short v; signed short s[8];} u;
7838
static DONT_INLINE int foo(vector signed short data)
7840
--- a/src/gcc/testsuite/gcc.dg/vmx/eg-5.c
7841
+++ b/src/gcc/testsuite/gcc.dg/vmx/eg-5.c
7844
/* Set result to a vector of f32 0's */
7845
vector float result = ((vector float){0.,0.,0.,0.});
7847
result = vec_madd (c0, vec_splat (v, 0), result);
7848
result = vec_madd (c1, vec_splat (v, 1), result);
7849
result = vec_madd (c2, vec_splat (v, 2), result);
7850
result = vec_madd (c3, vec_splat (v, 3), result);
7855
--- a/src/gcc/testsuite/gcc.dg/vmx/st-vsx-be-order.c
7856
+++ b/src/gcc/testsuite/gcc.dg/vmx/st-vsx-be-order.c
7858
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
7859
+/* { dg-require-effective-target powerpc_vsx_ok } */
7860
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
7862
+#include "harness.h"
7864
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
7865
+static double svd[2] __attribute__ ((aligned (16)));
7867
+static void check_arrays ()
7870
+ for (i = 0; i < 2; ++i)
7872
+ check (svul[i] == i, "svul");
7873
+ check (svd[i] == i * 1.0, "svd");
7877
+static void test ()
7879
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7880
+ vector unsigned long long vul = {1,0};
7881
+ vector double vd = {1.0,0.0};
7883
+ vector unsigned long long vul = {0,1};
7884
+ vector double vd = {0.0,1.0};
7887
+ vec_st (vul, 0, (vector unsigned long long *)svul);
7888
+ vec_st (vd, 0, (vector double *)svd);
7892
--- a/src/gcc/testsuite/gcc.dg/vmx/lde.c
7893
+++ b/src/gcc/testsuite/gcc.dg/vmx/lde.c
7895
+#include "harness.h"
7897
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
7898
+static signed char svsc[16] __attribute__ ((aligned (16)));
7899
+static unsigned short svus[8] __attribute__ ((aligned (16)));
7900
+static signed short svss[8] __attribute__ ((aligned (16)));
7901
+static unsigned int svui[4] __attribute__ ((aligned (16)));
7902
+static signed int svsi[4] __attribute__ ((aligned (16)));
7903
+static float svf[4] __attribute__ ((aligned (16)));
7905
+static void init ()
7908
+ for (i = 0; i < 16; ++i)
7913
+ for (i = 0; i < 8; ++i)
7918
+ for (i = 0; i < 4; ++i)
7922
+ svf[i] = i * 1.0f;
7926
+static void test ()
7928
+ vector unsigned char vuc;
7929
+ vector signed char vsc;
7930
+ vector unsigned short vus;
7931
+ vector signed short vss;
7932
+ vector unsigned int vui;
7933
+ vector signed int vsi;
7938
+ vuc = vec_lde (9*1, (unsigned char *)svuc);
7939
+ vsc = vec_lde (14*1, (signed char *)svsc);
7940
+ vus = vec_lde (7*2, (unsigned short *)svus);
7941
+ vss = vec_lde (1*2, (signed short *)svss);
7942
+ vui = vec_lde (3*4, (unsigned int *)svui);
7943
+ vsi = vec_lde (2*4, (signed int *)svsi);
7944
+ vf = vec_lde (0*4, (float *)svf);
7946
+ check (vec_extract (vuc, 9) == 9, "vuc");
7947
+ check (vec_extract (vsc, 14) == 6, "vsc");
7948
+ check (vec_extract (vus, 7) == 7, "vus");
7949
+ check (vec_extract (vss, 1) == -3, "vss");
7950
+ check (vec_extract (vui, 3) == 3, "vui");
7951
+ check (vec_extract (vsi, 2) == 0, "vsi");
7952
+ check (vec_extract (vf, 0) == 0.0, "vf");
7954
--- a/src/gcc/testsuite/gcc.dg/vmx/pack.c
7955
+++ b/src/gcc/testsuite/gcc.dg/vmx/pack.c
7957
+#include "harness.h"
7959
+#define BIG 4294967295
7963
+ /* Input vectors. */
7964
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
7965
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
7966
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
7967
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
7968
+ vector bool short vbsa = {0,65535,65535,0,0,0,65535,0};
7969
+ vector bool short vbsb = {65535,0,0,65535,65535,65535,0,65535};
7970
+ vector unsigned int vuia = {0,1,2,3};
7971
+ vector unsigned int vuib = {4,5,6,7};
7972
+ vector signed int vsia = {-4,-3,-2,-1};
7973
+ vector signed int vsib = {0,1,2,3};
7974
+ vector bool int vbia = {0,BIG,BIG,BIG};
7975
+ vector bool int vbib = {BIG,0,0,0};
7976
+ vector unsigned int vipa = {(0<<24) + (2<<19) + (3<<11) + (4<<3),
7977
+ (1<<24) + (5<<19) + (6<<11) + (7<<3),
7978
+ (0<<24) + (8<<19) + (9<<11) + (10<<3),
7979
+ (1<<24) + (11<<19) + (12<<11) + (13<<3)};
7980
+ vector unsigned int vipb = {(1<<24) + (14<<19) + (15<<11) + (16<<3),
7981
+ (0<<24) + (17<<19) + (18<<11) + (19<<3),
7982
+ (1<<24) + (20<<19) + (21<<11) + (22<<3),
7983
+ (0<<24) + (23<<19) + (24<<11) + (25<<3)};
7984
+ vector unsigned short vusc = {0,256,1,257,2,258,3,259};
7985
+ vector unsigned short vusd = {4,260,5,261,6,262,7,263};
7986
+ vector signed short vssc = {-1,-128,0,127,-2,-129,1,128};
7987
+ vector signed short vssd = {-3,-130,2,129,-4,-131,3,130};
7988
+ vector unsigned int vuic = {0,65536,1,65537};
7989
+ vector unsigned int vuid = {2,65538,3,65539};
7990
+ vector signed int vsic = {-1,-32768,0,32767};
7991
+ vector signed int vsid = {-2,-32769,1,32768};
7993
+ /* Result vectors. */
7994
+ vector unsigned char vucr;
7995
+ vector signed char vscr;
7996
+ vector bool char vbcr;
7997
+ vector unsigned short vusr;
7998
+ vector signed short vssr;
7999
+ vector bool short vbsr;
8001
+ vector unsigned char vucsr;
8002
+ vector signed char vscsr;
8003
+ vector unsigned short vussr;
8004
+ vector signed short vsssr;
8005
+ vector unsigned char vucsur1, vucsur2;
8006
+ vector unsigned short vussur1, vussur2;
8008
+ /* Expected result vectors. */
8009
+ vector unsigned char vucer = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8010
+ vector signed char vscer = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8011
+ vector bool char vbcer = {0,255,255,0,0,0,255,0,255,0,0,255,255,255,0,255};
8012
+ vector unsigned short vuser = {0,1,2,3,4,5,6,7};
8013
+ vector signed short vsser = {-4,-3,-2,-1,0,1,2,3};
8014
+ vector bool short vbser = {0,65535,65535,65535,65535,0,0,0};
8015
+ vector pixel vper = {(0<<15) + (2<<10) + (3<<5) + 4,
8016
+ (1<<15) + (5<<10) + (6<<5) + 7,
8017
+ (0<<15) + (8<<10) + (9<<5) + 10,
8018
+ (1<<15) + (11<<10) + (12<<5) + 13,
8019
+ (1<<15) + (14<<10) + (15<<5) + 16,
8020
+ (0<<15) + (17<<10) + (18<<5) + 19,
8021
+ (1<<15) + (20<<10) + (21<<5) + 22,
8022
+ (0<<15) + (23<<10) + (24<<5) + 25};
8023
+ vector unsigned char vucser = {0,255,1,255,2,255,3,255,4,255,5,255,6,255,7,255};
8024
+ vector signed char vscser = {-1,-128,0,127,-2,-128,1,127,
8025
+ -3,-128,2,127,-4,-128,3,127};
8026
+ vector unsigned short vusser = {0,65535,1,65535,2,65535,3,65535};
8027
+ vector signed short vssser = {-1,-32768,0,32767,-2,-32768,1,32767};
8028
+ vector unsigned char vucsuer1 = {0,255,1,255,2,255,3,255,4,255,5,255,6,255,7,255};
8029
+ vector unsigned char vucsuer2 = {0,0,0,127,0,0,1,128,0,0,2,129,0,0,3,130};
8030
+ vector unsigned short vussuer1 = {0,65535,1,65535,2,65535,3,65535};
8031
+ vector unsigned short vussuer2 = {0,0,0,32767,0,0,1,32768};
8033
+ vucr = vec_pack (vusa, vusb);
8034
+ vscr = vec_pack (vssa, vssb);
8035
+ vbcr = vec_pack (vbsa, vbsb);
8036
+ vusr = vec_pack (vuia, vuib);
8037
+ vssr = vec_pack (vsia, vsib);
8038
+ vbsr = vec_pack (vbia, vbib);
8039
+ vpr = vec_packpx (vipa, vipb);
8040
+ vucsr = vec_packs (vusc, vusd);
8041
+ vscsr = vec_packs (vssc, vssd);
8042
+ vussr = vec_packs (vuic, vuid);
8043
+ vsssr = vec_packs (vsic, vsid);
8044
+ vucsur1 = vec_packsu (vusc, vusd);
8045
+ vucsur2 = vec_packsu (vssc, vssd);
8046
+ vussur1 = vec_packsu (vuic, vuid);
8047
+ vussur2 = vec_packsu (vsic, vsid);
8049
+ check (vec_all_eq (vucr, vucer), "vucr");
8050
+ check (vec_all_eq (vscr, vscer), "vscr");
8051
+ check (vec_all_eq (vbcr, vbcer), "vbcr");
8052
+ check (vec_all_eq (vusr, vuser), "vusr");
8053
+ check (vec_all_eq (vssr, vsser), "vssr");
8054
+ check (vec_all_eq (vbsr, vbser), "vbsr");
8055
+ check (vec_all_eq (vpr, vper ), "vpr" );
8056
+ check (vec_all_eq (vucsr, vucser), "vucsr");
8057
+ check (vec_all_eq (vscsr, vscser), "vscsr");
8058
+ check (vec_all_eq (vussr, vusser), "vussr");
8059
+ check (vec_all_eq (vsssr, vssser), "vsssr");
8060
+ check (vec_all_eq (vucsur1, vucsuer1), "vucsur1");
8061
+ check (vec_all_eq (vucsur2, vucsuer2), "vucsur2");
8062
+ check (vec_all_eq (vussur1, vussuer1), "vussur1");
8063
+ check (vec_all_eq (vussur2, vussuer2), "vussur2");
8065
--- a/src/gcc/testsuite/gcc.dg/vmx/unpack-be-order.c
8066
+++ b/src/gcc/testsuite/gcc.dg/vmx/unpack-be-order.c
8068
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8070
+#include "harness.h"
8072
+#define BIG 4294967295
8076
+ /* Input vectors. */
8077
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8078
+ vector bool char vbc = {0,255,255,0,0,0,255,0,255,0,0,255,255,255,0,255};
8079
+ vector pixel vp = {(0<<15) + (1<<10) + (2<<5) + 3,
8080
+ (1<<15) + (4<<10) + (5<<5) + 6,
8081
+ (0<<15) + (7<<10) + (8<<5) + 9,
8082
+ (1<<15) + (10<<10) + (11<<5) + 12,
8083
+ (1<<15) + (13<<10) + (14<<5) + 15,
8084
+ (0<<15) + (16<<10) + (17<<5) + 18,
8085
+ (1<<15) + (19<<10) + (20<<5) + 21,
8086
+ (0<<15) + (22<<10) + (23<<5) + 24};
8087
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
8088
+ vector bool short vbs = {0,65535,65535,0,0,0,65535,0};
8090
+ /* Result vectors. */
8091
+ vector signed short vsch, vscl;
8092
+ vector bool short vbsh, vbsl;
8093
+ vector unsigned int vuih, vuil;
8094
+ vector signed int vsih, vsil;
8095
+ vector bool int vbih, vbil;
8097
+ /* Expected result vectors. */
8098
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8099
+ vector signed short vschr = {0,1,2,3,4,5,6,7};
8100
+ vector signed short vsclr = {-8,-7,-6,-5,-4,-3,-2,-1};
8101
+ vector bool short vbshr = {65535,0,0,65535,65535,65535,0,65535};
8102
+ vector bool short vbslr = {0,65535,65535,0,0,0,65535,0};
8103
+ vector unsigned int vuihr = {(65535<<24) + (13<<16) + (14<<8) + 15,
8104
+ (0<<24) + (16<<16) + (17<<8) + 18,
8105
+ (65535<<24) + (19<<16) + (20<<8) + 21,
8106
+ (0<<24) + (22<<16) + (23<<8) + 24};
8107
+ vector unsigned int vuilr = {(0<<24) + (1<<16) + (2<<8) + 3,
8108
+ (65535<<24) + (4<<16) + (5<<8) + 6,
8109
+ (0<<24) + (7<<16) + (8<<8) + 9,
8110
+ (65535<<24) + (10<<16) + (11<<8) + 12};
8111
+ vector signed int vsihr = {0,1,2,3};
8112
+ vector signed int vsilr = {-4,-3,-2,-1};
8113
+ vector bool int vbihr = {0,0,BIG,0};
8114
+ vector bool int vbilr = {0,BIG,BIG,0};
8116
+ vector signed short vschr = {-8,-7,-6,-5,-4,-3,-2,-1};
8117
+ vector signed short vsclr = {0,1,2,3,4,5,6,7};
8118
+ vector bool short vbshr = {0,65535,65535,0,0,0,65535,0};
8119
+ vector bool short vbslr = {65535,0,0,65535,65535,65535,0,65535};
8120
+ vector unsigned int vuihr = {(0<<24) + (1<<16) + (2<<8) + 3,
8121
+ (65535<<24) + (4<<16) + (5<<8) + 6,
8122
+ (0<<24) + (7<<16) + (8<<8) + 9,
8123
+ (65535<<24) + (10<<16) + (11<<8) + 12};
8124
+ vector unsigned int vuilr = {(65535<<24) + (13<<16) + (14<<8) + 15,
8125
+ (0<<24) + (16<<16) + (17<<8) + 18,
8126
+ (65535<<24) + (19<<16) + (20<<8) + 21,
8127
+ (0<<24) + (22<<16) + (23<<8) + 24};
8128
+ vector signed int vsihr = {-4,-3,-2,-1};
8129
+ vector signed int vsilr = {0,1,2,3};
8130
+ vector bool int vbihr = {0,BIG,BIG,0};
8131
+ vector bool int vbilr = {0,0,BIG,0};
8134
+ vsch = vec_unpackh (vsc);
8135
+ vscl = vec_unpackl (vsc);
8136
+ vbsh = vec_unpackh (vbc);
8137
+ vbsl = vec_unpackl (vbc);
8138
+ vuih = vec_unpackh (vp);
8139
+ vuil = vec_unpackl (vp);
8140
+ vsih = vec_unpackh (vss);
8141
+ vsil = vec_unpackl (vss);
8142
+ vbih = vec_unpackh (vbs);
8143
+ vbil = vec_unpackl (vbs);
8145
+ check (vec_all_eq (vsch, vschr), "vsch");
8146
+ check (vec_all_eq (vscl, vsclr), "vscl");
8147
+ check (vec_all_eq (vbsh, vbshr), "vbsh");
8148
+ check (vec_all_eq (vbsl, vbslr), "vbsl");
8149
+ check (vec_all_eq (vuih, vuihr), "vuih");
8150
+ check (vec_all_eq (vuil, vuilr), "vuil");
8151
+ check (vec_all_eq (vsih, vsihr), "vsih");
8152
+ check (vec_all_eq (vsil, vsilr), "vsil");
8153
+ check (vec_all_eq (vbih, vbihr), "vbih");
8154
+ check (vec_all_eq (vbil, vbilr), "vbil");
8156
--- a/src/gcc/testsuite/gcc.dg/vmx/st.c
8157
+++ b/src/gcc/testsuite/gcc.dg/vmx/st.c
8159
+#include "harness.h"
8161
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8162
+static signed char svsc[16] __attribute__ ((aligned (16)));
8163
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
8164
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8165
+static signed short svss[8] __attribute__ ((aligned (16)));
8166
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
8167
+static unsigned short svp[8] __attribute__ ((aligned (16)));
8168
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8169
+static signed int svsi[4] __attribute__ ((aligned (16)));
8170
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
8171
+static float svf[4] __attribute__ ((aligned (16)));
8173
+static void check_arrays ()
8176
+ for (i = 0; i < 16; ++i)
8178
+ check (svuc[i] == i, "svuc");
8179
+ check (svsc[i] == i - 8, "svsc");
8180
+ check (svbc[i] == ((i % 2) ? 0xff : 0), "svbc");
8182
+ for (i = 0; i < 8; ++i)
8184
+ check (svus[i] == i, "svus");
8185
+ check (svss[i] == i - 4, "svss");
8186
+ check (svbs[i] == ((i % 2) ? 0xffff : 0), "svbs");
8187
+ check (svp[i] == i, "svp");
8189
+ for (i = 0; i < 4; ++i)
8191
+ check (svui[i] == i, "svui");
8192
+ check (svsi[i] == i - 2, "svsi");
8193
+ check (svbi[i] == ((i % 2) ? 0xffffffff : 0), "svbi");
8194
+ check (svf[i] == i * 1.0f, "svf");
8198
+static void test ()
8200
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8201
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8202
+ vector bool char vbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
8203
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
8204
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
8205
+ vector bool short vbs = {0,65535,0,65535,0,65535,0,65535};
8206
+ vector pixel vp = {0,1,2,3,4,5,6,7};
8207
+ vector unsigned int vui = {0,1,2,3};
8208
+ vector signed int vsi = {-2,-1,0,1};
8209
+ vector bool int vbi = {0,0xffffffff,0,0xffffffff};
8210
+ vector float vf = {0.0,1.0,2.0,3.0};
8212
+ vec_st (vuc, 0, (vector unsigned char *)svuc);
8213
+ vec_st (vsc, 0, (vector signed char *)svsc);
8214
+ vec_st (vbc, 0, (vector bool char *)svbc);
8215
+ vec_st (vus, 0, (vector unsigned short *)svus);
8216
+ vec_st (vss, 0, (vector signed short *)svss);
8217
+ vec_st (vbs, 0, (vector bool short *)svbs);
8218
+ vec_st (vp, 0, (vector pixel *)svp);
8219
+ vec_st (vui, 0, (vector unsigned int *)svui);
8220
+ vec_st (vsi, 0, (vector signed int *)svsi);
8221
+ vec_st (vbi, 0, (vector bool int *)svbi);
8222
+ vec_st (vf, 0, (vector float *)svf);
8226
--- a/src/gcc/testsuite/gcc.dg/vmx/ste-be-order.c
8227
+++ b/src/gcc/testsuite/gcc.dg/vmx/ste-be-order.c
8229
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8231
+#include "harness.h"
8233
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8234
+static signed char svsc[16] __attribute__ ((aligned (16)));
8235
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8236
+static signed short svss[8] __attribute__ ((aligned (16)));
8237
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8238
+static signed int svsi[4] __attribute__ ((aligned (16)));
8239
+static float svf[4] __attribute__ ((aligned (16)));
8241
+static void check_arrays ()
8243
+ check (svuc[9] == 9, "svuc");
8244
+ check (svsc[14] == 6, "svsc");
8245
+ check (svus[7] == 7, "svus");
8246
+ check (svss[1] == -3, "svss");
8247
+ check (svui[3] == 3, "svui");
8248
+ check (svsi[2] == 0, "svsi");
8249
+ check (svf[0] == 0.0, "svf");
8252
+static void test ()
8254
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8255
+ vector unsigned char vuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
8256
+ vector signed char vsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};
8257
+ vector unsigned short vus = {7,6,5,4,3,2,1,0};
8258
+ vector signed short vss = {3,2,1,0,-1,-2,-3,-4};
8259
+ vector unsigned int vui = {3,2,1,0};
8260
+ vector signed int vsi = {1,0,-1,-2};
8261
+ vector float vf = {3.0,2.0,1.0,0.0};
8263
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8264
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8265
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
8266
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
8267
+ vector unsigned int vui = {0,1,2,3};
8268
+ vector signed int vsi = {-2,-1,0,1};
8269
+ vector float vf = {0.0,1.0,2.0,3.0};
8272
+ vec_ste (vuc, 9*1, (unsigned char *)svuc);
8273
+ vec_ste (vsc, 14*1, (signed char *)svsc);
8274
+ vec_ste (vus, 7*2, (unsigned short *)svus);
8275
+ vec_ste (vss, 1*2, (signed short *)svss);
8276
+ vec_ste (vui, 3*4, (unsigned int *)svui);
8277
+ vec_ste (vsi, 2*4, (signed int *)svsi);
8278
+ vec_ste (vf, 0*4, (float *)svf);
8282
--- a/src/gcc/testsuite/gcc.dg/vmx/insert.c
8283
+++ b/src/gcc/testsuite/gcc.dg/vmx/insert.c
8285
+#include "harness.h"
8289
+ vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8290
+ vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8291
+ vector unsigned short vc = {0,1,2,3,4,5,6,7};
8292
+ vector signed short vd = {-4,-3,-2,-1,0,1,2,3};
8293
+ vector unsigned int ve = {0,1,2,3};
8294
+ vector signed int vf = {-2,-1,0,1};
8295
+ vector float vg = {-2.0f,-1.0f,0.0f,1.0f};
8297
+ check (vec_all_eq (vec_insert (16, va, 5),
8298
+ ((vector unsigned char)
8299
+ {0,1,2,3,4,16,6,7,8,9,10,11,12,13,14,15})),
8300
+ "vec_insert (va)");
8301
+ check (vec_all_eq (vec_insert (-16, vb, 0),
8302
+ ((vector signed char)
8303
+ {-16,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7})),
8304
+ "vec_insert (vb)");
8305
+ check (vec_all_eq (vec_insert (16, vc, 7),
8306
+ ((vector unsigned short){0,1,2,3,4,5,6,16})),
8307
+ "vec_insert (vc)");
8308
+ check (vec_all_eq (vec_insert (-16, vd, 3),
8309
+ ((vector signed short){-4,-3,-2,-16,0,1,2,3})),
8310
+ "vec_insert (vd)");
8311
+ check (vec_all_eq (vec_insert (16, ve, 2),
8312
+ ((vector unsigned int){0,1,16,3})),
8313
+ "vec_insert (ve)");
8314
+ check (vec_all_eq (vec_insert (-16, vf, 1),
8315
+ ((vector signed int){-2,-16,0,1})),
8316
+ "vec_insert (vf)");
8317
+ check (vec_all_eq (vec_insert (-16.0f, vg, 0),
8318
+ ((vector float){-16.0f,-1.0f,0.0f,1.0f})),
8319
+ "vec_insert (vg)");
8322
--- a/src/gcc/testsuite/gcc.dg/vmx/ld-vsx.c
8323
+++ b/src/gcc/testsuite/gcc.dg/vmx/ld-vsx.c
8325
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
8326
+/* { dg-require-effective-target powerpc_vsx_ok } */
8327
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
8329
+#include "harness.h"
8331
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
8332
+static double svd[2] __attribute__ ((aligned (16)));
8334
+static void init ()
8337
+ for (i = 0; i < 2; ++i)
8344
+static void test ()
8346
+ vector unsigned long long evul = {0,1};
8347
+ vector double evd = {0.0,1.0};
8349
+ vector unsigned long long vul;
8355
+ vul = vec_ld (0, (vector unsigned long long *)svul);
8356
+ vd = vec_ld (0, (vector double *)svd);
8358
+ for (i = 0; i < 2; ++i)
8360
+ check (vul[i] == evul[i], "vul");
8361
+ check (vd[i] == evd[i], "vd" );
8364
--- a/src/gcc/testsuite/gcc.dg/vmx/extract-vsx.c
8365
+++ b/src/gcc/testsuite/gcc.dg/vmx/extract-vsx.c
8367
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
8368
+/* { dg-require-effective-target powerpc_vsx_ok } */
8369
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
8371
+#include "harness.h"
8375
+ vector long long vl = {0, 1};
8376
+ vector double vd = {0.0, 1.0};
8378
+ check (vec_extract (vl, 0) == 0, "vec_extract, vl, 0");
8379
+ check (vec_extract (vd, 1) == 1.0, "vec_extract, vd, 1");
8380
+ check (vl[0] == 0, "[], vl, 0");
8381
+ check (vd[1] == 1.0, "[], vd, 0");
8383
--- a/src/gcc/testsuite/gcc.dg/vmx/perm.c
8384
+++ b/src/gcc/testsuite/gcc.dg/vmx/perm.c
8386
+#include "harness.h"
8390
+ /* Input vectors. */
8391
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8392
+ vector unsigned char vucb
8393
+ = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
8394
+ vector unsigned char vucp = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
8396
+ vector signed char vsca
8397
+ = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
8398
+ vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8399
+ vector unsigned char vscp = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
8401
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
8402
+ vector unsigned short vusb = {8,9,10,11,12,13,14,15};
8403
+ vector unsigned char vusp = {0,1,30,31,2,3,28,29,4,5,26,27,6,7,24,25};
8405
+ vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
8406
+ vector signed short vssb = {0,1,2,3,4,5,6,7};
8407
+ vector unsigned char vssp = {0,1,30,31,2,3,28,29,4,5,26,27,6,7,24,25};
8409
+ vector unsigned int vuia = {0,1,2,3};
8410
+ vector unsigned int vuib = {4,5,6,7};
8411
+ vector unsigned char vuip = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
8413
+ vector signed int vsia = {-4,-3,-2,-1};
8414
+ vector signed int vsib = {0,1,2,3};
8415
+ vector unsigned char vsip = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
8417
+ vector float vfa = {-4.0,-3.0,-2.0,-1.0};
8418
+ vector float vfb = {0.0,1.0,2.0,3.0};
8419
+ vector unsigned char vfp = {0,1,2,3,28,29,30,31,4,5,6,7,24,25,26,27};
8421
+ /* Result vectors. */
8422
+ vector unsigned char vuc;
8423
+ vector signed char vsc;
8424
+ vector unsigned short vus;
8425
+ vector signed short vss;
8426
+ vector unsigned int vui;
8427
+ vector signed int vsi;
8430
+ /* Expected result vectors. */
8431
+ vector unsigned char vucr = {0,31,1,30,2,29,3,28,4,27,5,26,6,25,7,24};
8432
+ vector signed char vscr = {-16,15,-15,14,-14,13,-13,12,-12,11,-11,10,-10,9,-9,8};
8433
+ vector unsigned short vusr = {0,15,1,14,2,13,3,12};
8434
+ vector signed short vssr = {-8,7,-7,6,-6,5,-5,4};
8435
+ vector unsigned int vuir = {0,7,1,6};
8436
+ vector signed int vsir = {-4,3,-3,2};
8437
+ vector float vfr = {-4.0,3.0,-3.0,2.0};
8439
+ vuc = vec_perm (vuca, vucb, vucp);
8440
+ vsc = vec_perm (vsca, vscb, vscp);
8441
+ vus = vec_perm (vusa, vusb, vusp);
8442
+ vss = vec_perm (vssa, vssb, vssp);
8443
+ vui = vec_perm (vuia, vuib, vuip);
8444
+ vsi = vec_perm (vsia, vsib, vsip);
8445
+ vf = vec_perm (vfa, vfb, vfp );
8447
+ check (vec_all_eq (vuc, vucr), "vuc");
8448
+ check (vec_all_eq (vsc, vscr), "vsc");
8449
+ check (vec_all_eq (vus, vusr), "vus");
8450
+ check (vec_all_eq (vss, vssr), "vss");
8451
+ check (vec_all_eq (vui, vuir), "vui");
8452
+ check (vec_all_eq (vsi, vsir), "vsi");
8453
+ check (vec_all_eq (vf, vfr), "vf" );
8455
--- a/src/gcc/testsuite/gcc.dg/vmx/extract-be-order.c
8456
+++ b/src/gcc/testsuite/gcc.dg/vmx/extract-be-order.c
8458
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8460
+#include "harness.h"
8464
+ vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8465
+ vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8466
+ vector unsigned short vc = {0,1,2,3,4,5,6,7};
8467
+ vector signed short vd = {-4,-3,-2,-1,0,1,2,3};
8468
+ vector unsigned int ve = {0,1,2,3};
8469
+ vector signed int vf = {-2,-1,0,1};
8470
+ vector float vg = {-2.0f,-1.0f,0.0f,1.0f};
8472
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8473
+ check (vec_extract (va, 5) == 10, "vec_extract (va, 5)");
8474
+ check (vec_extract (vb, 0) == 7, "vec_extract (vb, 0)");
8475
+ check (vec_extract (vc, 7) == 0, "vec_extract (vc, 7)");
8476
+ check (vec_extract (vd, 3) == 0, "vec_extract (vd, 3)");
8477
+ check (vec_extract (ve, 2) == 1, "vec_extract (ve, 2)");
8478
+ check (vec_extract (vf, 1) == 0, "vec_extract (vf, 1)");
8479
+ check (vec_extract (vg, 0) == 1.0f, "vec_extract (vg, 0)");
8481
+ check (vec_extract (va, 5) == 5, "vec_extract (va, 5)");
8482
+ check (vec_extract (vb, 0) == -8, "vec_extract (vb, 0)");
8483
+ check (vec_extract (vc, 7) == 7, "vec_extract (vc, 7)");
8484
+ check (vec_extract (vd, 3) == -1, "vec_extract (vd, 3)");
8485
+ check (vec_extract (ve, 2) == 2, "vec_extract (ve, 2)");
8486
+ check (vec_extract (vf, 1) == -1, "vec_extract (vf, 1)");
8487
+ check (vec_extract (vg, 0) == -2.0f, "vec_extract (vg, 0)");
8491
--- a/src/gcc/testsuite/gcc.dg/vmx/ldl-be-order.c
8492
+++ b/src/gcc/testsuite/gcc.dg/vmx/ldl-be-order.c
8494
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8496
+#include "harness.h"
8498
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8499
+static signed char svsc[16] __attribute__ ((aligned (16)));
8500
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
8501
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8502
+static signed short svss[8] __attribute__ ((aligned (16)));
8503
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
8504
+static unsigned short svp[8] __attribute__ ((aligned (16)));
8505
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8506
+static signed int svsi[4] __attribute__ ((aligned (16)));
8507
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
8508
+static float svf[4] __attribute__ ((aligned (16)));
8510
+static void init ()
8513
+ for (i = 0; i < 16; ++i)
8517
+ svbc[i] = (i % 2) ? 0xff : 0;
8519
+ for (i = 0; i < 8; ++i)
8523
+ svbs[i] = (i % 2) ? 0xffff : 0;
8526
+ for (i = 0; i < 4; ++i)
8530
+ svbi[i] = (i % 2) ? 0xffffffff : 0;
8531
+ svf[i] = i * 1.0f;
8535
+static void test ()
8537
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8538
+ vector unsigned char evuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
8539
+ vector signed char evsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};
8540
+ vector bool char evbc = {255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0};
8541
+ vector unsigned short evus = {7,6,5,4,3,2,1,0};
8542
+ vector signed short evss = {3,2,1,0,-1,-2,-3,-4};
8543
+ vector bool short evbs = {65535,0,65535,0,65535,0,65535,0};
8544
+ vector pixel evp = {7,6,5,4,3,2,1,0};
8545
+ vector unsigned int evui = {3,2,1,0};
8546
+ vector signed int evsi = {1,0,-1,-2};
8547
+ vector bool int evbi = {0xffffffff,0,0xffffffff,0};
8548
+ vector float evf = {3.0,2.0,1.0,0.0};
8550
+ vector unsigned char evuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8551
+ vector signed char evsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8552
+ vector bool char evbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
8553
+ vector unsigned short evus = {0,1,2,3,4,5,6,7};
8554
+ vector signed short evss = {-4,-3,-2,-1,0,1,2,3};
8555
+ vector bool short evbs = {0,65535,0,65535,0,65535,0,65535};
8556
+ vector pixel evp = {0,1,2,3,4,5,6,7};
8557
+ vector unsigned int evui = {0,1,2,3};
8558
+ vector signed int evsi = {-2,-1,0,1};
8559
+ vector bool int evbi = {0,0xffffffff,0,0xffffffff};
8560
+ vector float evf = {0.0,1.0,2.0,3.0};
8563
+ vector unsigned char vuc;
8564
+ vector signed char vsc;
8565
+ vector bool char vbc;
8566
+ vector unsigned short vus;
8567
+ vector signed short vss;
8568
+ vector bool short vbs;
8570
+ vector unsigned int vui;
8571
+ vector signed int vsi;
8572
+ vector bool int vbi;
8577
+ vuc = vec_ldl (0, (vector unsigned char *)svuc);
8578
+ vsc = vec_ldl (0, (vector signed char *)svsc);
8579
+ vbc = vec_ldl (0, (vector bool char *)svbc);
8580
+ vus = vec_ldl (0, (vector unsigned short *)svus);
8581
+ vss = vec_ldl (0, (vector signed short *)svss);
8582
+ vbs = vec_ldl (0, (vector bool short *)svbs);
8583
+ vp = vec_ldl (0, (vector pixel *)svp);
8584
+ vui = vec_ldl (0, (vector unsigned int *)svui);
8585
+ vsi = vec_ldl (0, (vector signed int *)svsi);
8586
+ vbi = vec_ldl (0, (vector bool int *)svbi);
8587
+ vf = vec_ldl (0, (vector float *)svf);
8589
+ check (vec_all_eq (vuc, evuc), "vuc");
8590
+ check (vec_all_eq (vsc, evsc), "vsc");
8591
+ check (vec_all_eq (vbc, evbc), "vbc");
8592
+ check (vec_all_eq (vus, evus), "vus");
8593
+ check (vec_all_eq (vss, evss), "vss");
8594
+ check (vec_all_eq (vbs, evbs), "vbs");
8595
+ check (vec_all_eq (vp, evp ), "vp" );
8596
+ check (vec_all_eq (vui, evui), "vui");
8597
+ check (vec_all_eq (vsi, evsi), "vsi");
8598
+ check (vec_all_eq (vbi, evbi), "vbi");
8599
+ check (vec_all_eq (vf, evf ), "vf" );
8601
--- a/src/gcc/testsuite/gcc.dg/vmx/mult-even-odd.c
8602
+++ b/src/gcc/testsuite/gcc.dg/vmx/mult-even-odd.c
8604
+#include "harness.h"
8608
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8609
+ vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3};
8610
+ vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8611
+ vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3};
8612
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
8613
+ vector unsigned short vusb = {2,3,2,3,2,3,2,3};
8614
+ vector signed short vssa = {-4,-3,-2,-1,0,1,2,3};
8615
+ vector signed short vssb = {2,-3,2,-3,2,-3,2,-3};
8616
+ vector unsigned short vuse, vuso;
8617
+ vector signed short vsse, vsso;
8618
+ vector unsigned int vuie, vuio;
8619
+ vector signed int vsie, vsio;
8621
+ vuse = vec_mule (vuca, vucb);
8622
+ vuso = vec_mulo (vuca, vucb);
8623
+ vsse = vec_mule (vsca, vscb);
8624
+ vsso = vec_mulo (vsca, vscb);
8625
+ vuie = vec_mule (vusa, vusb);
8626
+ vuio = vec_mulo (vusa, vusb);
8627
+ vsie = vec_mule (vssa, vssb);
8628
+ vsio = vec_mulo (vssa, vssb);
8630
+ check (vec_all_eq (vuse,
8631
+ ((vector unsigned short){0,4,8,12,16,20,24,28})),
8633
+ check (vec_all_eq (vuso,
8634
+ ((vector unsigned short){3,9,15,21,27,33,39,45})),
8636
+ check (vec_all_eq (vsse,
8637
+ ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
8639
+ check (vec_all_eq (vsso,
8640
+ ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
8642
+ check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie");
8643
+ check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio");
8644
+ check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie");
8645
+ check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio");
8647
--- a/src/gcc/testsuite/gcc.dg/vmx/splat-be-order.c
8648
+++ b/src/gcc/testsuite/gcc.dg/vmx/splat-be-order.c
8650
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8652
+#include "harness.h"
8656
+ /* Input vectors. */
8657
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8658
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8659
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
8660
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
8661
+ vector unsigned int vui = {0,1,2,3};
8662
+ vector signed int vsi = {-2,-1,0,1};
8663
+ vector float vf = {-2.0,-1.0,0.0,1.0};
8665
+ /* Result vectors. */
8666
+ vector unsigned char vucr;
8667
+ vector signed char vscr;
8668
+ vector unsigned short vusr;
8669
+ vector signed short vssr;
8670
+ vector unsigned int vuir;
8671
+ vector signed int vsir;
8674
+ /* Expected result vectors. */
8675
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8676
+ vector unsigned char vucer = {14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
8677
+ vector signed char vscer = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
8678
+ vector unsigned short vuser = {0,0,0,0,0,0,0,0};
8679
+ vector signed short vsser = {3,3,3,3,3,3,3,3};
8680
+ vector unsigned int vuier = {1,1,1,1};
8681
+ vector signed int vsier = {-2,-2,-2,-2};
8682
+ vector float vfer = {0.0,0.0,0.0,0.0};
8684
+ vector unsigned char vucer = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
8685
+ vector signed char vscer = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
8686
+ vector unsigned short vuser = {7,7,7,7,7,7,7,7};
8687
+ vector signed short vsser = {-4,-4,-4,-4,-4,-4,-4,-4};
8688
+ vector unsigned int vuier = {2,2,2,2};
8689
+ vector signed int vsier = {1,1,1,1};
8690
+ vector float vfer = {-1.0,-1.0,-1.0,-1.0};
8693
+ vucr = vec_splat (vuc, 1);
8694
+ vscr = vec_splat (vsc, 8);
8695
+ vusr = vec_splat (vus, 7);
8696
+ vssr = vec_splat (vss, 0);
8697
+ vuir = vec_splat (vui, 2);
8698
+ vsir = vec_splat (vsi, 3);
8699
+ vfr = vec_splat (vf, 1);
8701
+ check (vec_all_eq (vucr, vucer), "vuc");
8702
+ check (vec_all_eq (vscr, vscer), "vsc");
8703
+ check (vec_all_eq (vusr, vuser), "vus");
8704
+ check (vec_all_eq (vssr, vsser), "vss");
8705
+ check (vec_all_eq (vuir, vuier), "vui");
8706
+ check (vec_all_eq (vsir, vsier), "vsi");
8707
+ check (vec_all_eq (vfr, vfer ), "vf");
8709
--- a/src/gcc/testsuite/gcc.dg/vmx/extract-vsx-be-order.c
8710
+++ b/src/gcc/testsuite/gcc.dg/vmx/extract-vsx-be-order.c
8712
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
8713
+/* { dg-require-effective-target powerpc_vsx_ok } */
8714
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
8716
+#include "harness.h"
8720
+ vector long long vl = {0, 1};
8721
+ vector double vd = {0.0, 1.0};
8723
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8724
+ check (vec_extract (vl, 0) == 1, "vl, 0");
8725
+ check (vec_extract (vd, 1) == 0.0, "vd, 1");
8727
+ check (vec_extract (vl, 0) == 0, "vl, 0");
8728
+ check (vec_extract (vd, 1) == 1.0, "vd, 1");
8731
--- a/src/gcc/testsuite/gcc.dg/vmx/ld-be-order.c
8732
+++ b/src/gcc/testsuite/gcc.dg/vmx/ld-be-order.c
8734
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
8736
+#include "harness.h"
8738
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8739
+static signed char svsc[16] __attribute__ ((aligned (16)));
8740
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
8741
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8742
+static signed short svss[8] __attribute__ ((aligned (16)));
8743
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
8744
+static unsigned short svp[8] __attribute__ ((aligned (16)));
8745
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8746
+static signed int svsi[4] __attribute__ ((aligned (16)));
8747
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
8748
+static float svf[4] __attribute__ ((aligned (16)));
8750
+static void init ()
8753
+ for (i = 0; i < 16; ++i)
8757
+ svbc[i] = (i % 2) ? 0xff : 0;
8759
+ for (i = 0; i < 8; ++i)
8763
+ svbs[i] = (i % 2) ? 0xffff : 0;
8766
+ for (i = 0; i < 4; ++i)
8770
+ svbi[i] = (i % 2) ? 0xffffffff : 0;
8771
+ svf[i] = i * 1.0f;
8775
+static void test ()
8777
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8778
+ vector unsigned char evuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
8779
+ vector signed char evsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};
8780
+ vector bool char evbc = {255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0};
8781
+ vector unsigned short evus = {7,6,5,4,3,2,1,0};
8782
+ vector signed short evss = {3,2,1,0,-1,-2,-3,-4};
8783
+ vector bool short evbs = {65535,0,65535,0,65535,0,65535,0};
8784
+ vector pixel evp = {7,6,5,4,3,2,1,0};
8785
+ vector unsigned int evui = {3,2,1,0};
8786
+ vector signed int evsi = {1,0,-1,-2};
8787
+ vector bool int evbi = {0xffffffff,0,0xffffffff,0};
8788
+ vector float evf = {3.0,2.0,1.0,0.0};
8790
+ vector unsigned char evuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8791
+ vector signed char evsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8792
+ vector bool char evbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
8793
+ vector unsigned short evus = {0,1,2,3,4,5,6,7};
8794
+ vector signed short evss = {-4,-3,-2,-1,0,1,2,3};
8795
+ vector bool short evbs = {0,65535,0,65535,0,65535,0,65535};
8796
+ vector pixel evp = {0,1,2,3,4,5,6,7};
8797
+ vector unsigned int evui = {0,1,2,3};
8798
+ vector signed int evsi = {-2,-1,0,1};
8799
+ vector bool int evbi = {0,0xffffffff,0,0xffffffff};
8800
+ vector float evf = {0.0,1.0,2.0,3.0};
8803
+ vector unsigned char vuc;
8804
+ vector signed char vsc;
8805
+ vector bool char vbc;
8806
+ vector unsigned short vus;
8807
+ vector signed short vss;
8808
+ vector bool short vbs;
8810
+ vector unsigned int vui;
8811
+ vector signed int vsi;
8812
+ vector bool int vbi;
8817
+ vuc = vec_ld (0, (vector unsigned char *)svuc);
8818
+ vsc = vec_ld (0, (vector signed char *)svsc);
8819
+ vbc = vec_ld (0, (vector bool char *)svbc);
8820
+ vus = vec_ld (0, (vector unsigned short *)svus);
8821
+ vss = vec_ld (0, (vector signed short *)svss);
8822
+ vbs = vec_ld (0, (vector bool short *)svbs);
8823
+ vp = vec_ld (0, (vector pixel *)svp);
8824
+ vui = vec_ld (0, (vector unsigned int *)svui);
8825
+ vsi = vec_ld (0, (vector signed int *)svsi);
8826
+ vbi = vec_ld (0, (vector bool int *)svbi);
8827
+ vf = vec_ld (0, (vector float *)svf);
8829
+ check (vec_all_eq (vuc, evuc), "vuc");
8830
+ check (vec_all_eq (vsc, evsc), "vsc");
8831
+ check (vec_all_eq (vbc, evbc), "vbc");
8832
+ check (vec_all_eq (vus, evus), "vus");
8833
+ check (vec_all_eq (vss, evss), "vss");
8834
+ check (vec_all_eq (vbs, evbs), "vbs");
8835
+ check (vec_all_eq (vp, evp ), "vp" );
8836
+ check (vec_all_eq (vui, evui), "vui");
8837
+ check (vec_all_eq (vsi, evsi), "vsi");
8838
+ check (vec_all_eq (vbi, evbi), "vbi");
8839
+ check (vec_all_eq (vf, evf ), "vf" );
8841
--- a/src/gcc/testsuite/gcc.dg/vmx/ld.c
8842
+++ b/src/gcc/testsuite/gcc.dg/vmx/ld.c
8844
+#include "harness.h"
8846
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8847
+static signed char svsc[16] __attribute__ ((aligned (16)));
8848
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
8849
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8850
+static signed short svss[8] __attribute__ ((aligned (16)));
8851
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
8852
+static unsigned short svp[8] __attribute__ ((aligned (16)));
8853
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8854
+static signed int svsi[4] __attribute__ ((aligned (16)));
8855
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
8856
+static float svf[4] __attribute__ ((aligned (16)));
8858
+static void init ()
8861
+ for (i = 0; i < 16; ++i)
8865
+ svbc[i] = (i % 2) ? 0xff : 0;
8867
+ for (i = 0; i < 8; ++i)
8871
+ svbs[i] = (i % 2) ? 0xffff : 0;
8874
+ for (i = 0; i < 4; ++i)
8878
+ svbi[i] = (i % 2) ? 0xffffffff : 0;
8879
+ svf[i] = i * 1.0f;
8883
+static void test ()
8885
+ vector unsigned char evuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8886
+ vector signed char evsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8887
+ vector bool char evbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
8888
+ vector unsigned short evus = {0,1,2,3,4,5,6,7};
8889
+ vector signed short evss = {-4,-3,-2,-1,0,1,2,3};
8890
+ vector bool short evbs = {0,65535,0,65535,0,65535,0,65535};
8891
+ vector pixel evp = {0,1,2,3,4,5,6,7};
8892
+ vector unsigned int evui = {0,1,2,3};
8893
+ vector signed int evsi = {-2,-1,0,1};
8894
+ vector bool int evbi = {0,0xffffffff,0,0xffffffff};
8895
+ vector float evf = {0.0,1.0,2.0,3.0};
8897
+ vector unsigned char vuc;
8898
+ vector signed char vsc;
8899
+ vector bool char vbc;
8900
+ vector unsigned short vus;
8901
+ vector signed short vss;
8902
+ vector bool short vbs;
8904
+ vector unsigned int vui;
8905
+ vector signed int vsi;
8906
+ vector bool int vbi;
8911
+ vuc = vec_ld (0, (vector unsigned char *)svuc);
8912
+ vsc = vec_ld (0, (vector signed char *)svsc);
8913
+ vbc = vec_ld (0, (vector bool char *)svbc);
8914
+ vus = vec_ld (0, (vector unsigned short *)svus);
8915
+ vss = vec_ld (0, (vector signed short *)svss);
8916
+ vbs = vec_ld (0, (vector bool short *)svbs);
8917
+ vp = vec_ld (0, (vector pixel *)svp);
8918
+ vui = vec_ld (0, (vector unsigned int *)svui);
8919
+ vsi = vec_ld (0, (vector signed int *)svsi);
8920
+ vbi = vec_ld (0, (vector bool int *)svbi);
8921
+ vf = vec_ld (0, (vector float *)svf);
8923
+ check (vec_all_eq (vuc, evuc), "vuc");
8924
+ check (vec_all_eq (vsc, evsc), "vsc");
8925
+ check (vec_all_eq (vbc, evbc), "vbc");
8926
+ check (vec_all_eq (vus, evus), "vus");
8927
+ check (vec_all_eq (vss, evss), "vss");
8928
+ check (vec_all_eq (vbs, evbs), "vbs");
8929
+ check (vec_all_eq (vp, evp ), "vp" );
8930
+ check (vec_all_eq (vui, evui), "vui");
8931
+ check (vec_all_eq (vsi, evsi), "vsi");
8932
+ check (vec_all_eq (vbi, evbi), "vbi");
8933
+ check (vec_all_eq (vf, evf ), "vf" );
8935
--- a/src/gcc/testsuite/gcc.dg/vmx/sn7153.c
8936
+++ b/src/gcc/testsuite/gcc.dg/vmx/sn7153.c
8941
+#ifdef __LITTLE_ENDIAN__
8942
+ if (vec_any_ne(vec_splat(vec_mfvscr(), 0), ((vector unsigned short){1,1,1,1,1,1,1,1})))
8944
if (vec_any_ne(vec_splat(vec_mfvscr(), 7), ((vector unsigned short){1,1,1,1,1,1,1,1})))
8947
union {vector unsigned short v; unsigned short s[8];} u;
8949
--- a/src/gcc/testsuite/gcc.dg/vmx/stl.c
8950
+++ b/src/gcc/testsuite/gcc.dg/vmx/stl.c
8952
+#include "harness.h"
8954
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
8955
+static signed char svsc[16] __attribute__ ((aligned (16)));
8956
+static unsigned char svbc[16] __attribute__ ((aligned (16)));
8957
+static unsigned short svus[8] __attribute__ ((aligned (16)));
8958
+static signed short svss[8] __attribute__ ((aligned (16)));
8959
+static unsigned short svbs[8] __attribute__ ((aligned (16)));
8960
+static unsigned short svp[8] __attribute__ ((aligned (16)));
8961
+static unsigned int svui[4] __attribute__ ((aligned (16)));
8962
+static signed int svsi[4] __attribute__ ((aligned (16)));
8963
+static unsigned int svbi[4] __attribute__ ((aligned (16)));
8964
+static float svf[4] __attribute__ ((aligned (16)));
8966
+static void check_arrays ()
8969
+ for (i = 0; i < 16; ++i)
8971
+ check (svuc[i] == i, "svuc");
8972
+ check (svsc[i] == i - 8, "svsc");
8973
+ check (svbc[i] == ((i % 2) ? 0xff : 0), "svbc");
8975
+ for (i = 0; i < 8; ++i)
8977
+ check (svus[i] == i, "svus");
8978
+ check (svss[i] == i - 4, "svss");
8979
+ check (svbs[i] == ((i % 2) ? 0xffff : 0), "svbs");
8980
+ check (svp[i] == i, "svp");
8982
+ for (i = 0; i < 4; ++i)
8984
+ check (svui[i] == i, "svui");
8985
+ check (svsi[i] == i - 2, "svsi");
8986
+ check (svbi[i] == ((i % 2) ? 0xffffffff : 0), "svbi");
8987
+ check (svf[i] == i * 1.0f, "svf");
8991
+static void test ()
8993
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
8994
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
8995
+ vector bool char vbc = {0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255};
8996
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
8997
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
8998
+ vector bool short vbs = {0,65535,0,65535,0,65535,0,65535};
8999
+ vector pixel vp = {0,1,2,3,4,5,6,7};
9000
+ vector unsigned int vui = {0,1,2,3};
9001
+ vector signed int vsi = {-2,-1,0,1};
9002
+ vector bool int vbi = {0,0xffffffff,0,0xffffffff};
9003
+ vector float vf = {0.0,1.0,2.0,3.0};
9005
+ vec_stl (vuc, 0, (vector unsigned char *)svuc);
9006
+ vec_stl (vsc, 0, (vector signed char *)svsc);
9007
+ vec_stl (vbc, 0, (vector bool char *)svbc);
9008
+ vec_stl (vus, 0, (vector unsigned short *)svus);
9009
+ vec_stl (vss, 0, (vector signed short *)svss);
9010
+ vec_stl (vbs, 0, (vector bool short *)svbs);
9011
+ vec_stl (vp, 0, (vector pixel *)svp);
9012
+ vec_stl (vui, 0, (vector unsigned int *)svui);
9013
+ vec_stl (vsi, 0, (vector signed int *)svsi);
9014
+ vec_stl (vbi, 0, (vector bool int *)svbi);
9015
+ vec_stl (vf, 0, (vector float *)svf);
9019
--- a/src/gcc/testsuite/gcc.dg/vmx/st-vsx.c
9020
+++ b/src/gcc/testsuite/gcc.dg/vmx/st-vsx.c
9022
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9023
+/* { dg-require-effective-target powerpc_vsx_ok } */
9024
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9026
+#include "harness.h"
9028
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
9029
+static double svd[2] __attribute__ ((aligned (16)));
9031
+static void check_arrays ()
9034
+ for (i = 0; i < 2; ++i)
9036
+ check (svul[i] == i, "svul");
9037
+ check (svd[i] == i * 1.0, "svd");
9041
+static void test ()
9043
+ vector unsigned long long vul = {0,1};
9044
+ vector double vd = {0.0,1.0};
9046
+ vec_st (vul, 0, (vector unsigned long long *)svul);
9047
+ vec_st (vd, 0, (vector double *)svd);
9051
--- a/src/gcc/testsuite/gcc.dg/vmx/sum2s.c
9052
+++ b/src/gcc/testsuite/gcc.dg/vmx/sum2s.c
9054
+#include "harness.h"
9058
+ vector signed int vsia = {-10,1,2,3};
9059
+ vector signed int vsib = {100,101,102,-103};
9060
+ vector signed int vsir;
9061
+ vector signed int vsier = {0,92,0,-98};
9063
+ vsir = vec_sum2s (vsia, vsib);
9065
+ check (vec_all_eq (vsir, vsier), "vsir");
9067
--- a/src/gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c
9068
+++ b/src/gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c
9070
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9071
+/* { dg-require-effective-target powerpc_vsx_ok } */
9072
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
9074
+#include "harness.h"
9076
+static int vec_long_long_eq (vector long long x, vector long long y)
9078
+ return (x[0] == y[0] && x[1] == y[1]);
9081
+static int vec_double_eq (vector double x, vector double y)
9083
+ return (x[0] == y[0] && x[1] == y[1]);
9088
+ /* Input vectors. */
9089
+ vector long long vla = {-2,-1};
9090
+ vector long long vlb = {0,1};
9091
+ vector double vda = {-2.0,-1.0};
9092
+ vector double vdb = {0.0,1.0};
9094
+ /* Result vectors. */
9095
+ vector long long vlh, vll;
9096
+ vector double vdh, vdl;
9098
+ /* Expected result vectors. */
9099
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9100
+ vector long long vlrh = {1,-1};
9101
+ vector long long vlrl = {0,-2};
9102
+ vector double vdrh = {1.0,-1.0};
9103
+ vector double vdrl = {0.0,-2.0};
9105
+ vector long long vlrh = {-2,0};
9106
+ vector long long vlrl = {-1,1};
9107
+ vector double vdrh = {-2.0,0.0};
9108
+ vector double vdrl = {-1.0,1.0};
9111
+ vlh = vec_mergeh (vla, vlb);
9112
+ vll = vec_mergel (vla, vlb);
9113
+ vdh = vec_mergeh (vda, vdb);
9114
+ vdl = vec_mergel (vda, vdb);
9116
+ check (vec_long_long_eq (vlh, vlrh), "vlh");
9117
+ check (vec_long_long_eq (vll, vlrl), "vll");
9118
+ check (vec_double_eq (vdh, vdrh), "vdh" );
9119
+ check (vec_double_eq (vdl, vdrl), "vdl" );
9121
--- a/src/gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c
9122
+++ b/src/gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c
9124
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
9126
+#include "harness.h"
9130
+ vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
9131
+ vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3};
9132
+ vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
9133
+ vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3};
9134
+ vector unsigned short vusa = {0,1,2,3,4,5,6,7};
9135
+ vector unsigned short vusb = {2,3,2,3,2,3,2,3};
9136
+ vector signed short vssa = {-4,-3,-2,-1,0,1,2,3};
9137
+ vector signed short vssb = {2,-3,2,-3,2,-3,2,-3};
9138
+ vector unsigned short vuse, vuso;
9139
+ vector signed short vsse, vsso;
9140
+ vector unsigned int vuie, vuio;
9141
+ vector signed int vsie, vsio;
9143
+ vuse = vec_mule (vuca, vucb);
9144
+ vuso = vec_mulo (vuca, vucb);
9145
+ vsse = vec_mule (vsca, vscb);
9146
+ vsso = vec_mulo (vsca, vscb);
9147
+ vuie = vec_mule (vusa, vusb);
9148
+ vuio = vec_mulo (vusa, vusb);
9149
+ vsie = vec_mule (vssa, vssb);
9150
+ vsio = vec_mulo (vssa, vssb);
9152
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9153
+ check (vec_all_eq (vuse,
9154
+ ((vector unsigned short){3,9,15,21,27,33,39,45})),
9156
+ check (vec_all_eq (vuso,
9157
+ ((vector unsigned short){0,4,8,12,16,20,24,28})),
9159
+ check (vec_all_eq (vsse,
9160
+ ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
9162
+ check (vec_all_eq (vsso,
9163
+ ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
9165
+ check (vec_all_eq (vuie, ((vector unsigned int){3,9,15,21})), "vuie");
9166
+ check (vec_all_eq (vuio, ((vector unsigned int){0,4,8,12})), "vuio");
9167
+ check (vec_all_eq (vsie, ((vector signed int){9,3,-3,-9})), "vsie");
9168
+ check (vec_all_eq (vsio, ((vector signed int){-8,-4,0,4})), "vsio");
9170
+ check (vec_all_eq (vuse,
9171
+ ((vector unsigned short){0,4,8,12,16,20,24,28})),
9173
+ check (vec_all_eq (vuso,
9174
+ ((vector unsigned short){3,9,15,21,27,33,39,45})),
9176
+ check (vec_all_eq (vsse,
9177
+ ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
9179
+ check (vec_all_eq (vsso,
9180
+ ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
9182
+ check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie");
9183
+ check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio");
9184
+ check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie");
9185
+ check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio");
9188
--- a/src/gcc/testsuite/gcc.dg/vmx/insert-vsx.c
9189
+++ b/src/gcc/testsuite/gcc.dg/vmx/insert-vsx.c
9191
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9192
+/* { dg-require-effective-target powerpc_vsx_ok } */
9193
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9195
+#include "harness.h"
9197
+static int vec_long_long_eq (vector long long x, vector long long y)
9199
+ return (x[0] == y[0] && x[1] == y[1]);
9202
+static int vec_dbl_eq (vector double x, vector double y)
9204
+ return (x[0] == y[0] && x[1] == y[1]);
9209
+ vector long long vl = {0, 1};
9210
+ vector double vd = {0.0, 1.0};
9211
+ vector long long vlr = vec_insert (2, vl, 0);
9212
+ vector double vdr = vec_insert (2.0, vd, 1);
9213
+ vector long long vler = {2, 1};
9214
+ vector double vder = {0.0, 2.0};
9216
+ check (vec_long_long_eq (vlr, vler), "vl");
9217
+ check (vec_dbl_eq (vdr, vder), "vd");
9219
--- a/src/gcc/testsuite/gcc.dg/vmx/vsums-be-order.c
9220
+++ b/src/gcc/testsuite/gcc.dg/vmx/vsums-be-order.c
9222
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
9224
+#include "harness.h"
9228
+ vector signed int va = {-7,11,-13,17};
9230
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9231
+ vector signed int vb = {128,0,0,0};
9232
+ vector signed int evd = {136,0,0,0};
9234
+ vector signed int vb = {0,0,0,128};
9235
+ vector signed int evd = {0,0,0,136};
9238
+ vector signed int vd = vec_sums (va, vb);
9240
+ check (vec_all_eq (vd, evd), "sums");
9242
--- a/src/gcc/testsuite/gcc.dg/vmx/ldl-vsx.c
9243
+++ b/src/gcc/testsuite/gcc.dg/vmx/ldl-vsx.c
9245
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9246
+/* { dg-require-effective-target powerpc_vsx_ok } */
9247
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9249
+#include "harness.h"
9251
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
9252
+static double svd[2] __attribute__ ((aligned (16)));
9254
+static void init ()
9257
+ for (i = 0; i < 2; ++i)
9264
+static void test ()
9266
+ vector unsigned long long evul = {0,1};
9267
+ vector double evd = {0.0,1.0};
9269
+ vector unsigned long long vul;
9275
+ vul = vec_ldl (0, (vector unsigned long long *)svul);
9276
+ vd = vec_ldl (0, (vector double *)svd);
9278
+ for (i = 0; i < 2; ++i)
9280
+ check (vul[i] == evul[i], "vul");
9281
+ check (vd[i] == evd[i], "vd" );
9284
--- a/src/gcc/testsuite/gcc.dg/vmx/ste.c
9285
+++ b/src/gcc/testsuite/gcc.dg/vmx/ste.c
9287
+#include "harness.h"
9289
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
9290
+static signed char svsc[16] __attribute__ ((aligned (16)));
9291
+static unsigned short svus[8] __attribute__ ((aligned (16)));
9292
+static signed short svss[8] __attribute__ ((aligned (16)));
9293
+static unsigned int svui[4] __attribute__ ((aligned (16)));
9294
+static signed int svsi[4] __attribute__ ((aligned (16)));
9295
+static float svf[4] __attribute__ ((aligned (16)));
9297
+static void check_arrays ()
9299
+ check (svuc[9] == 9, "svuc");
9300
+ check (svsc[14] == 6, "svsc");
9301
+ check (svus[7] == 7, "svus");
9302
+ check (svss[1] == -3, "svss");
9303
+ check (svui[3] == 3, "svui");
9304
+ check (svsi[2] == 0, "svsi");
9305
+ check (svf[0] == 0.0, "svf");
9308
+static void test ()
9310
+ vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
9311
+ vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
9312
+ vector unsigned short vus = {0,1,2,3,4,5,6,7};
9313
+ vector signed short vss = {-4,-3,-2,-1,0,1,2,3};
9314
+ vector unsigned int vui = {0,1,2,3};
9315
+ vector signed int vsi = {-2,-1,0,1};
9316
+ vector float vf = {0.0,1.0,2.0,3.0};
9318
+ vec_ste (vuc, 9*1, (unsigned char *)svuc);
9319
+ vec_ste (vsc, 14*1, (signed char *)svsc);
9320
+ vec_ste (vus, 7*2, (unsigned short *)svus);
9321
+ vec_ste (vss, 1*2, (signed short *)svss);
9322
+ vec_ste (vui, 3*4, (unsigned int *)svui);
9323
+ vec_ste (vsi, 2*4, (signed int *)svsi);
9324
+ vec_ste (vf, 0*4, (float *)svf);
9328
--- a/src/gcc/testsuite/gcc.dg/vmx/lde-be-order.c
9329
+++ b/src/gcc/testsuite/gcc.dg/vmx/lde-be-order.c
9331
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
9333
+#include "harness.h"
9335
+static unsigned char svuc[16] __attribute__ ((aligned (16)));
9336
+static signed char svsc[16] __attribute__ ((aligned (16)));
9337
+static unsigned short svus[8] __attribute__ ((aligned (16)));
9338
+static signed short svss[8] __attribute__ ((aligned (16)));
9339
+static unsigned int svui[4] __attribute__ ((aligned (16)));
9340
+static signed int svsi[4] __attribute__ ((aligned (16)));
9341
+static float svf[4] __attribute__ ((aligned (16)));
9343
+static void init ()
9346
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9347
+ for (i = 15; i >= 0; --i)
9349
+ for (i = 0; i < 16; ++i)
9355
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9356
+ for (i = 7; i >= 0; --i)
9358
+ for (i = 0; i < 8; ++i)
9364
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9365
+ for (i = 3; i >= 0; --i)
9367
+ for (i = 0; i < 4; ++i)
9372
+ svf[i] = i * 1.0f;
9376
+static void test ()
9378
+ vector unsigned char vuc;
9379
+ vector signed char vsc;
9380
+ vector unsigned short vus;
9381
+ vector signed short vss;
9382
+ vector unsigned int vui;
9383
+ vector signed int vsi;
9388
+ vuc = vec_lde (9*1, (unsigned char *)svuc);
9389
+ vsc = vec_lde (14*1, (signed char *)svsc);
9390
+ vus = vec_lde (7*2, (unsigned short *)svus);
9391
+ vss = vec_lde (1*2, (signed short *)svss);
9392
+ vui = vec_lde (3*4, (unsigned int *)svui);
9393
+ vsi = vec_lde (2*4, (signed int *)svsi);
9394
+ vf = vec_lde (0*4, (float *)svf);
9396
+ check (vec_extract (vuc, 9) == 9, "vuc");
9397
+ check (vec_extract (vsc, 14) == 6, "vsc");
9398
+ check (vec_extract (vus, 7) == 7, "vus");
9399
+ check (vec_extract (vss, 1) == -3, "vss");
9400
+ check (vec_extract (vui, 3) == 3, "vui");
9401
+ check (vec_extract (vsi, 2) == 0, "vsi");
9402
+ check (vec_extract (vf, 0) == 0.0, "vf");
9404
--- a/src/gcc/testsuite/gcc.dg/vmx/splat-vsx.c
9405
+++ b/src/gcc/testsuite/gcc.dg/vmx/splat-vsx.c
9407
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9408
+/* { dg-require-effective-target powerpc_vsx_ok } */
9409
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9411
+#include "harness.h"
9415
+ /* Input vectors. */
9416
+ vector unsigned int vui = {0,1,2,3};
9417
+ vector signed int vsi = {-2,-1,0,1};
9418
+ vector float vf = {-2.0,-1.0,0.0,1.0};
9420
+ /* Result vectors. */
9421
+ vector unsigned int vuir;
9422
+ vector signed int vsir;
9425
+ /* Expected result vectors. */
9426
+ vector unsigned int vuier = {2,2,2,2};
9427
+ vector signed int vsier = {1,1,1,1};
9428
+ vector float vfer = {-1.0,-1.0,-1.0,-1.0};
9430
+ vuir = vec_splat (vui, 2);
9431
+ vsir = vec_splat (vsi, 3);
9432
+ vfr = vec_splat (vf, 1);
9434
+ check (vec_all_eq (vuir, vuier), "vui");
9435
+ check (vec_all_eq (vsir, vsier), "vsi");
9436
+ check (vec_all_eq (vfr, vfer ), "vf");
9438
--- a/src/gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c
9439
+++ b/src/gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c
9441
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
9443
+#include "harness.h"
9447
+ vector signed int vsia = {-10,1,2,3};
9448
+ vector signed int vsib = {100,101,102,-103};
9449
+ vector signed int vsir;
9450
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
9451
+ vector signed int vsier = {91,0,107,0};
9453
+ vector signed int vsier = {0,92,0,-98};
9456
+ vsir = vec_sum2s (vsia, vsib);
9458
+ check (vec_all_eq (vsir, vsier), "vsir");
9460
--- a/src/gcc/testsuite/gcc.dg/vmx/merge-vsx.c
9461
+++ b/src/gcc/testsuite/gcc.dg/vmx/merge-vsx.c
9463
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9464
+/* { dg-require-effective-target powerpc_vsx_ok } */
9465
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9467
+#include "harness.h"
9469
+static int vec_long_long_eq (vector long long x, vector long long y)
9471
+ return (x[0] == y[0] && x[1] == y[1]);
9474
+static int vec_double_eq (vector double x, vector double y)
9476
+ return (x[0] == y[0] && x[1] == y[1]);
9481
+ /* Input vectors. */
9482
+ vector long long vla = {-2,-1};
9483
+ vector long long vlb = {0,1};
9484
+ vector double vda = {-2.0,-1.0};
9485
+ vector double vdb = {0.0,1.0};
9487
+ /* Result vectors. */
9488
+ vector long long vlh, vll;
9489
+ vector double vdh, vdl;
9491
+ /* Expected result vectors. */
9492
+ vector long long vlrh = {-2,0};
9493
+ vector long long vlrl = {-1,1};
9494
+ vector double vdrh = {-2.0,0.0};
9495
+ vector double vdrl = {-1.0,1.0};
9497
+ vlh = vec_mergeh (vla, vlb);
9498
+ vll = vec_mergel (vla, vlb);
9499
+ vdh = vec_mergeh (vda, vdb);
9500
+ vdl = vec_mergel (vda, vdb);
9502
+ check (vec_long_long_eq (vlh, vlrh), "vlh");
9503
+ check (vec_long_long_eq (vll, vlrl), "vll");
9504
+ check (vec_double_eq (vdh, vdrh), "vdh" );
9505
+ check (vec_double_eq (vdl, vdrl), "vdl" );
9507
--- a/src/gcc/testsuite/gcc.dg/vmx/stl-vsx.c
9508
+++ b/src/gcc/testsuite/gcc.dg/vmx/stl-vsx.c
9510
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
9511
+/* { dg-require-effective-target powerpc_vsx_ok } */
9512
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
9514
+#include "harness.h"
9516
+static unsigned long long svul[2] __attribute__ ((aligned (16)));
9517
+static double svd[2] __attribute__ ((aligned (16)));
9519
+static void check_arrays ()
9522
+ for (i = 0; i < 2; ++i)
9524
+ check (svul[i] == i, "svul");
9525
+ check (svd[i] == i * 1.0, "svd");
9529
+static void test ()
9531
+ vector unsigned long long vul = {0,1};
9532
+ vector double vd = {0.0,1.0};
9534
+ vec_stl (vul, 0, (vector unsigned long long *)svul);
9535
+ vec_stl (vd, 0, (vector double *)svd);
9539
--- a/src/gcc/testsuite/gcc.dg/stack-usage-1.c
9540
+++ b/src/gcc/testsuite/gcc.dg/stack-usage-1.c
9543
#elif defined (__powerpc64__) || defined (__ppc64__) || defined (__POWERPC64__) \
9544
|| defined (__PPC64__)
9546
+# if _CALL_ELF == 2
9551
#elif defined (__powerpc__) || defined (__PPC__) || defined (__ppc__) \
9552
|| defined (__POWERPC__) || defined (PPC) || defined (_IBMR2)
9553
# if defined (__ALTIVEC__)
9554
--- a/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c
9555
+++ b/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c
9557
/* { dg-require-effective-target vect_int } */
9558
+/* { dg-skip-if "cost too high" { powerpc*le-*-* } { "*" } { "" } } */
9561
#include "../../tree-vect.h"
9562
--- a/src/gcc/testsuite/g++.dg/lookup/using9.C
9563
+++ b/src/gcc/testsuite/g++.dg/lookup/using9.C
9566
f(1); // { dg-error "ambiguous" }
9567
// { dg-message "candidate" "candidate note" { target *-*-* } 22 }
9568
- void f(int); // { dg-error "previous using declaration" }
9569
+ void f(int); // { dg-error "previous declaration" }
9575
- using B::f; // { dg-error "already declared" }
9576
+ using B::f; // { dg-error "previous declaration" }
9578
--- a/src/gcc/testsuite/g++.dg/eh/ppc64-sighandle-cr.C
9579
+++ b/src/gcc/testsuite/g++.dg/eh/ppc64-sighandle-cr.C
9581
+// { dg-do run { target { powerpc64*-*-linux* } } }
9582
+// { dg-options "-fexceptions -fnon-call-exceptions" }
9584
+#include <signal.h>
9585
+#include <stdlib.h>
9588
+#define SET_CR(R,V) __asm__ __volatile__ ("mtcrf %0,%1" : : "n" (1<<(7-R)), "r" (V<<(4*(7-R))) : "cr" #R)
9589
+#define GET_CR(R) ({ int tmp; __asm__ __volatile__ ("mfcr %0" : "=r" (tmp)); (tmp >> 4*(7-R)) & 15; })
9591
+void sighandler (int signo, siginfo_t * si, void * uc)
9600
+float test (float a, float b) __attribute__ ((__noinline__));
9601
+float test (float a, float b)
9604
+ asm ("mtcrf %1,%2" : "=f" (x) : "n" (1 << (7-3)), "r" (0), "0" (b) : "cr3");
9610
+ struct sigaction sa;
9613
+ sa.sa_sigaction = sighandler;
9614
+ sa.sa_flags = SA_SIGINFO;
9616
+ status = sigaction (SIGFPE, & sa, NULL);
9618
+ feenableexcept (FE_DIVBYZERO);
9628
+ return GET_CR(2) != 6 || GET_CR(3) != 9 || GET_CR(4) != 12;
9635
--- a/src/gcc/testsuite/g++.dg/overload/using3.C
9636
+++ b/src/gcc/testsuite/g++.dg/overload/using3.C
9638
+// { dg-do compile }
9647
+ void f(int); // { dg-message "previous" }
9652
+ using a::f; // { dg-error "conflicts" }
9654
--- a/src/gcc/testsuite/g++.dg/overload/using2.C
9655
+++ b/src/gcc/testsuite/g++.dg/overload/using2.C
9657
extern "C" void exit (int) throw ();
9658
extern "C" void *malloc (__SIZE_TYPE__) throw () __attribute__((malloc));
9660
- void abort (void) throw ();
9661
+ void abort (void) throw (); // { dg-message "previous" }
9662
void _exit (int) throw (); // { dg-error "conflicts" "conflicts" }
9663
// { dg-message "void _exit" "_exit" { target *-*-* } 49 }
9666
// { dg-message "void C1" "C1" { target *-*-* } 53 }
9668
extern "C" void c2 (void) throw ();
9669
- void C2 (void) throw ();
9670
+ void C2 (void) throw (); // { dg-message "previous" }
9672
int C3 (int) throw ();
9675
-using std::abort; // { dg-error "already declared" }
9676
+using std::abort; // { dg-error "conflicts" }
9678
-using std::C2; // { dg-error "already declared" }
9679
+using std::C2; // { dg-error "conflicts" }
9681
using std::c3; using other::c3;
9682
using std::C3; using other::C3;
9683
--- a/src/gcc/cp/ChangeLog.ibm
9684
+++ b/src/gcc/cp/ChangeLog.ibm
9686
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
9688
+ Back port from mainline
9689
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
9692
+ * cp-tree.h: Check OVL_USED with OVERLOAD_CHECK.
9693
+ * name-lookup.c (do_nonmember_using_decl): Make sure we have an
9694
+ OVERLOAD before calling OVL_USED. Call diagnose_name_conflict
9695
+ instead of issuing an error without mentioning the conflicting
9697
--- a/src/gcc/cp/cp-tree.h
9698
+++ b/src/gcc/cp/cp-tree.h
9700
/* If set, this was imported in a using declaration.
9701
This is not to confuse with being used somewhere, which
9702
is not important for this node. */
9703
-#define OVL_USED(NODE) TREE_USED (NODE)
9704
+#define OVL_USED(NODE) TREE_USED (OVERLOAD_CHECK (NODE))
9705
/* If set, this OVERLOAD was created for argument-dependent lookup
9706
and can be freed afterward. */
9707
#define OVL_ARG_DEPENDENT(NODE) TREE_LANG_FLAG_0 (OVERLOAD_CHECK (NODE))
9708
--- a/src/gcc/cp/name-lookup.c
9709
+++ b/src/gcc/cp/name-lookup.c
9710
@@ -2286,8 +2286,7 @@
9711
&& compparms (TYPE_ARG_TYPES (TREE_TYPE (fn)),
9712
TYPE_ARG_TYPES (TREE_TYPE (decl)))
9713
&& ! decls_match (fn, decl))
9714
- error ("%q#D conflicts with previous using declaration %q#D",
9716
+ diagnose_name_conflict (decl, fn);
9718
dup = duplicate_decls (decl, fn, is_friend);
9719
/* If DECL was a redeclaration of FN -- even an invalid
9720
@@ -2519,7 +2518,7 @@
9721
if (new_fn == old_fn)
9722
/* The function already exists in the current namespace. */
9724
- else if (OVL_USED (tmp1))
9725
+ else if (TREE_CODE (tmp1) == OVERLOAD && OVL_USED (tmp1))
9726
continue; /* this is a using decl */
9727
else if (compparms (TYPE_ARG_TYPES (TREE_TYPE (new_fn)),
9728
TYPE_ARG_TYPES (TREE_TYPE (old_fn))))
9729
@@ -2534,7 +2533,7 @@
9733
- error ("%qD is already declared in this scope", name);
9734
+ diagnose_name_conflict (new_fn, old_fn);
9738
--- a/src/gcc/builtins.def
9739
+++ b/src/gcc/builtins.def
9741
DEF_LIB_BUILTIN (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
9742
DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
9743
DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
9744
+DEF_GCC_BUILTIN (BUILT_IN_FABSD32, "fabsd32", BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST)
9745
+DEF_GCC_BUILTIN (BUILT_IN_FABSD64, "fabsd64", BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST)
9746
+DEF_GCC_BUILTIN (BUILT_IN_FABSD128, "fabsd128", BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST)
9747
DEF_C99_BUILTIN (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
9748
DEF_C99_BUILTIN (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
9749
DEF_C99_BUILTIN (BUILT_IN_FDIML, "fdiml", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
9750
--- a/src/gcc/expr.h
9751
+++ b/src/gcc/expr.h
9756
-extern void locate_and_pad_parm (enum machine_mode, tree, int, int, tree,
9757
- struct args_size *,
9758
+extern void locate_and_pad_parm (enum machine_mode, tree, int, int, int,
9759
+ tree, struct args_size *,
9760
struct locate_and_pad_arg_data *);
9762
/* Return the CODE_LABEL rtx for a LABEL_DECL, creating it if necessary. */
9763
--- a/src/gcc/function.c
9764
+++ b/src/gcc/function.c
9765
@@ -2507,6 +2507,7 @@
9768
locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,
9769
+ all->reg_parm_stack_space,
9770
entry_parm ? data->partial : 0, current_function_decl,
9771
&all->stack_args_size, &data->locate);
9773
@@ -3485,11 +3486,7 @@
9774
/* Adjust function incoming argument size for alignment and
9777
-#ifdef REG_PARM_STACK_SPACE
9778
- crtl->args.size = MAX (crtl->args.size,
9779
- REG_PARM_STACK_SPACE (fndecl));
9782
+ crtl->args.size = MAX (crtl->args.size, all.reg_parm_stack_space);
9783
crtl->args.size = CEIL_ROUND (crtl->args.size,
9784
PARM_BOUNDARY / BITS_PER_UNIT);
9786
@@ -3693,6 +3690,9 @@
9787
IN_REGS is nonzero if the argument will be passed in registers. It will
9788
never be set if REG_PARM_STACK_SPACE is not defined.
9790
+ REG_PARM_STACK_SPACE is the number of bytes of stack space reserved
9791
+ for arguments which are passed in registers.
9793
FNDECL is the function in which the argument was defined.
9795
There are two types of rounding that are done. The first, controlled by
9796
@@ -3713,7 +3713,8 @@
9799
locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
9800
- int partial, tree fndecl ATTRIBUTE_UNUSED,
9801
+ int reg_parm_stack_space, int partial,
9802
+ tree fndecl ATTRIBUTE_UNUSED,
9803
struct args_size *initial_offset_ptr,
9804
struct locate_and_pad_arg_data *locate)
9806
@@ -3720,12 +3721,8 @@
9808
enum direction where_pad;
9809
unsigned int boundary, round_boundary;
9810
- int reg_parm_stack_space = 0;
9811
int part_size_in_regs;
9813
-#ifdef REG_PARM_STACK_SPACE
9814
- reg_parm_stack_space = REG_PARM_STACK_SPACE (fndecl);
9816
/* If we have found a stack parm before we reach the end of the
9817
area reserved for registers, skip that area. */
9819
@@ -3743,7 +3740,6 @@
9820
initial_offset_ptr->constant = reg_parm_stack_space;
9823
-#endif /* REG_PARM_STACK_SPACE */
9825
part_size_in_regs = (reg_parm_stack_space == 0 ? partial : 0);
9827
@@ -3806,11 +3802,7 @@
9829
locate->slot_offset.constant += part_size_in_regs;
9832
-#ifdef REG_PARM_STACK_SPACE
9833
- || REG_PARM_STACK_SPACE (fndecl) > 0
9836
+ if (!in_regs || reg_parm_stack_space > 0)
9837
pad_to_arg_alignment (&locate->slot_offset, boundary,
9838
&locate->alignment_pad);
9840
@@ -3830,11 +3822,7 @@
9841
pad_below (&locate->offset, passed_mode, sizetree);
9843
#else /* !ARGS_GROW_DOWNWARD */
9845
-#ifdef REG_PARM_STACK_SPACE
9846
- || REG_PARM_STACK_SPACE (fndecl) > 0
9849
+ if (!in_regs || reg_parm_stack_space > 0)
9850
pad_to_arg_alignment (initial_offset_ptr, boundary,
9851
&locate->alignment_pad);
9852
locate->slot_offset = *initial_offset_ptr;
9853
@@ -5093,6 +5081,7 @@
9854
amount. BLKmode results are handled using the group load/store
9856
if (TYPE_MODE (TREE_TYPE (decl_result)) != BLKmode
9857
+ && REG_P (real_decl_rtl)
9858
&& targetm.calls.return_in_msb (TREE_TYPE (decl_result)))
9860
emit_move_insn (gen_rtx_REG (GET_MODE (decl_rtl),
9861
--- a/src/gcc/ChangeLog.ibm
9862
+++ b/src/gcc/ChangeLog.ibm
9864
+2014-03-04 Peter Bergner <bergner@vnet.ibm.com>
9866
+ Merge up to 208295.
9867
+ * REVISION: Update subversion id.
9869
+ Picks up LIBITM fixes for libitm.c/reentrant.c.
9871
+2014-03-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9873
+ Backport from mainline r208287
9874
+ 2014-03-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9876
+ * config/rs6000/rs6000.c (rs6000_preferred_reload_class): Disallow
9877
+ reload of PLUS rtx's outside of GENERAL_REGS or BASE_REGS; relax
9878
+ constraint on constants to permit them being loaded into
9879
+ GENERAL_REGS or BASE_REGS.
9881
+2014-02-26 Alan Modra <amodra@gmail.com>
9883
+ Apply mainline r207798
9886
+ * config/rs6000/rs6000.c (rs6000_secondary_reload_inner): Use
9887
+ find_replacement on parts of insn rtl that might be reloaded.
9889
+2014-02-25 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9891
+ * config/rs6000/vector.md (*vector_unordered<mode>): Change split
9892
+ to use canonical form for nor<mode>3.
9894
+2014-02-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9896
+ * config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Relax assert
9897
+ to permit subregs.
9899
+2014-02-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9901
+ Backport from mainline 208049
9902
+ 2014-02-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9904
+ * config/rs6000/altivec.md (altivec_lve<VI_char>x): Replace
9905
+ define_insn with define_expand and new define_insn
9906
+ *altivec_lve<VI_char>x_internal.
9907
+ (altivec_stve<VI_char>x): Replace define_insn with define_expand
9908
+ and new define_insn *altivec_stve<VI_char>x_internal.
9909
+ * config/rs6000/rs6000-protos.h (altivec_expand_stvex_be): New
9911
+ * config/rs6000/rs6000.c (altivec_expand_lvx_be): Document use by
9913
+ (altivec_expand_stvex_be): New function.
9915
+2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9917
+ Backport from mainline 208021
9918
+ 2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9920
+ * config/rs6000/altivec.md (altivec_vsumsws): Replace second
9921
+ vspltw with vsldoi.
9922
+ (reduc_uplus_v16qi): Use gen_altivec_vsumsws_direct instead of
9923
+ gen_altivec_vsumsws.
9925
+2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9927
+ Backport from mainline 208019
9928
+ 2014-02-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9930
+ * config/rs6000/altivec.md (altivec_lvxl): Rename as
9931
+ *altivec_lvxl_<mode>_internal and use VM2 iterator instead of
9933
+ (altivec_lvxl_<mode>): New define_expand incorporating
9934
+ -maltivec=be semantics where needed.
9935
+ (altivec_lvx): Rename as *altivec_lvx_<mode>_internal.
9936
+ (altivec_lvx_<mode>): New define_expand incorporating -maltivec=be
9937
+ semantics where needed.
9938
+ (altivec_stvx): Rename as *altivec_stvx_<mode>_internal.
9939
+ (altivec_stvx_<mode>): New define_expand incorporating
9940
+ -maltivec=be semantics where needed.
9941
+ (altivec_stvxl): Rename as *altivec_stvxl_<mode>_internal and use
9942
+ VM2 iterator instead of V4SI.
9943
+ (altivec_stvxl_<mode>): New define_expand incorporating
9944
+ -maltivec=be semantics where needed.
9945
+ * config/rs6000/rs6000-builtin.def: Add new built-in definitions
9946
+ LVXL_V2DF, LVXL_V2DI, LVXL_V4SF, LVXL_V4SI, LVXL_V8HI, LVXL_V16QI,
9947
+ LVX_V2DF, LVX_V2DI, LVX_V4SF, LVX_V4SI, LVX_V8HI, LVX_V16QI,
9948
+ STVX_V2DF, STVX_V2DI, STVX_V4SF, STVX_V4SI, STVX_V8HI, STVX_V16QI,
9949
+ STVXL_V2DF, STVXL_V2DI, STVXL_V4SF, STVXL_V4SI, STVXL_V8HI,
9951
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Replace
9952
+ ALTIVEC_BUILTIN_LVX with ALTIVEC_BUILTIN_LVX_<MODE> throughout;
9953
+ similarly for ALTIVEC_BUILTIN_LVXL, ALTIVEC_BUILTIN_STVX, and
9954
+ ALTIVEC_BUILTIN_STVXL.
9955
+ * config/rs6000/rs6000-protos.h (altivec_expand_lvx_be): New
9957
+ (altivec_expand_stvx_be): Likewise.
9958
+ * config/rs6000/rs6000.c (swap_selector_for_mode): New function.
9959
+ (altivec_expand_lvx_be): Likewise.
9960
+ (altivec_expand_stvx_be): Likewise.
9961
+ (altivec_expand_builtin): Add cases for
9962
+ ALTIVEC_BUILTIN_STVX_<MODE>, ALTIVEC_BUILTIN_STVXL_<MODE>,
9963
+ ALTIVEC_BUILTIN_LVXL_<MODE>, and ALTIVEC_BUILTIN_LVX_<MODE>.
9964
+ (altivec_init_builtins): Add definitions for
9965
+ __builtin_altivec_lvxl_<mode>, __builtin_altivec_lvx_<mode>,
9966
+ __builtin_altivec_stvx_<mode>, and
9967
+ __builtin_altivec_stvxl_<mode>.
9969
+2014-02-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9971
+ Backport from mainline r207919.
9972
+ 2014-02-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9974
+ * config/rs6000/rs6000.c (vspltis_constant): Fix most significant
9977
+2014-02-18 Michael Meissner <meissner@linux.vnet.ibm.com>
9979
+ Backport from mainline r207868.
9980
+ 2014-02-18 Michael Meissner <meissner@linux.vnet.ibm.com>
9983
+ * config/rs6000/rs6000.md (mov<mode>_64bit, TF/TDmode moves):
9984
+ Split 64-bit moves into 2 patterns. Do not allow the use of
9985
+ direct move for TDmode in little endian, since the decimal value
9986
+ has little endian bytes within a word, but the 64-bit pieces are
9987
+ ordered in a big endian fashion, and normal subreg's of TDmode are
9989
+ (mov<mode>_64bit_dm): Likewise.
9990
+ (movtd_64bit_nodm): Likewise.
9992
+2014-02-16 Bill Schmidt <wschmidt@Linux.vnet.ibm.com>
9994
+ Backport from mainline r207815.
9995
+ 2014-02-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
9997
+ * config/rs6000/altivec.md (p8_vmrgew): Handle little endian
9999
+ (p8_vmrgow): Likewise.
10001
+2014-02-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10003
+ Backport from mainline r207814.
10004
+ 2014-02-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10006
+ * config/rs6000/vsx.md (vsx_xxpermdi_<mode>): Handle little
10009
+2014-02-15 Michael Meissner <meissner@linux.vnet.ibm.com>
10011
+ Backport from mainline r207808.
10012
+ 2014-02-15 Michael Meissner <meissner@linux.vnet.ibm.com>
10015
+ * config/rs6000/rs6000.md (rreg): Add TFmode, TDmode constraints.
10016
+ (mov<mode>_internal, TFmode/TDmode): Split TFmode/TDmode moves
10017
+ into 64-bit and 32-bit moves. On 64-bit moves, add support for
10018
+ using direct move instructions on ISA 2.07. Also adjust
10019
+ instruction length for 64-bit.
10020
+ (mov<mode>_64bit, TFmode/TDmode): Likewise.
10021
+ (mov<mode>_32bit, TFmode/TDmode): Likewise.
10023
+2014-02-11 Michael Meissner <meissner@linux.vnet.ibm.com>
10025
+ Backport from mainline r207699.
10026
+ 2014-02-11 Michael Meissner <meissner@linux.vnet.ibm.com>
10029
+ * config/rs6000/rs6000.md (128-bit GPR splitter): Add a splitter
10030
+ for VSX/Altivec vectors that land in GPR registers.
10032
+2014-02-06 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10034
+ Backport from mainline r207658
10035
+ 2014-02-06 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10037
+ * config/rs6000/sysv4.h (ENDIAN_SELECT): Do not attempt to enforce
10038
+ big-endian mode for -mcall-aixdesc, -mcall-freebsd, -mcall-netbsd,
10039
+ -mcall-openbsd, or -mcall-linux.
10040
+ (CC1_ENDIAN_BIG_SPEC): Remove.
10041
+ (CC1_ENDIAN_LITTLE_SPEC): Remove.
10042
+ (CC1_ENDIAN_DEFAULT_SPEC): Remove.
10043
+ (CC1_SPEC): Remove (always empty) %cc1_endian_... spec.
10044
+ (SUBTARGET_EXTRA_SPECS): Remove %cc1_endian_big, %cc1_endian_little,
10045
+ and %cc1_endian_default.
10046
+ * config/rs6000/sysv4le.h (CC1_ENDIAN_DEFAULT_SPEC): Remove.
10048
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10050
+ Backport from mainline r207525
10051
+ 2014-02-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10053
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Change
10054
+ CODE_FOR_altivec_vpku[hw]um to
10055
+ CODE_FOR_altivec_vpku[hw]um_direct.
10056
+ * config/rs6000/altivec.md (vec_unpacks_hi_<VP_small_lc>): Change
10057
+ UNSPEC_VUNPACK_HI_SIGN to UNSPEC_VUNPACK_HI_SIGN_DIRECT.
10058
+ (vec_unpacks_lo_<VP_small_lc>): Change UNSPEC_VUNPACK_LO_SIGN to
10059
+ UNSPEC_VUNPACK_LO_SIGN_DIRECT.
10061
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10063
+ Backport from mainline r207521
10064
+ 2014-02-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10066
+ * config/rs6000/altivec.md (altivec_vsum2sws): Adjust code
10067
+ generation for -maltivec=be.
10068
+ (altivec_vsumsws): Simplify redundant test.
10070
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10072
+ Backport from mainline r207520
10073
+ 2014-02-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10075
+ * altivec.md (UNSPEC_VPACK_UNS_UNS_MOD_DIRECT): New unspec.
10076
+ (UNSPEC_VUNPACK_HI_SIGN_DIRECT): Likewise.
10077
+ (UNSPEC_VUNPACK_LO_SIGN_DIRECT): Likewise.
10078
+ (mulv8hi3): Use gen_altivec_vpkuwum_direct instead of
10079
+ gen_altivec_vpkuwum.
10080
+ (altivec_vpkpx): Test for VECTOR_ELT_ORDER_BIG instead of for
10081
+ BYTES_BIG_ENDIAN.
10082
+ (altivec_vpks<VI_char>ss): Likewise.
10083
+ (altivec_vpks<VI_char>us): Likewise.
10084
+ (altivec_vpku<VI_char>us): Likewise.
10085
+ (altivec_vpku<VI_char>um): Likewise.
10086
+ (altivec_vpku<VI_char>um_direct): New (copy of
10087
+ altivec_vpku<VI_char>um that still relies on BYTES_BIG_ENDIAN, for
10089
+ (altivec_vupkhs<VU_char>): Emit vupkls* instead of vupkhs* when
10090
+ target is little endian and -maltivec=be is not specified.
10091
+ (*altivec_vupkhs<VU_char>_direct): New (copy of
10092
+ altivec_vupkhs<VU_char> that always emits vupkhs*, for internal
10094
+ (altivec_vupkls<VU_char>): Emit vupkhs* instead of vupkls* when
10095
+ target is little endian and -maltivec=be is not specified.
10096
+ (*altivec_vupkls<VU_char>_direct): New (copy of
10097
+ altivec_vupkls<VU_char> that always emits vupkls*, for internal
10099
+ (altivec_vupkhpx): Emit vupklpx instead of vupkhpx when target is
10100
+ little endian and -maltivec=be is not specified.
10101
+ (altivec_vupklpx): Emit vupkhpx instead of vupklpx when target is
10102
+ little endian and -maltivec=be is not specified.
10104
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10106
+ Backport from mainline r207415
10107
+ 2014-02-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10109
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): Generalize
10110
+ for vector types other than V16QImode.
10111
+ * config/rs6000/altivec.md (altivec_vperm_<mode>): Change to a
10112
+ define_expand, and call altivec_expand_vec_perm_le when producing
10113
+ code with little endian element order.
10114
+ (*altivec_vperm_<mode>_internal): New insn having previous
10115
+ behavior of altivec_vperm_<mode>.
10116
+ (altivec_vperm_<mode>_uns): Change to a define_expand, and call
10117
+ altivec_expand_vec_perm_le when producing code with little endian
10119
+ (*altivec_vperm_<mode>_uns_internal): New insn having previous
10120
+ behavior of altivec_vperm_<mode>_uns.
10122
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10124
+ Backport from mainline r207414
10125
+ 2014-02-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10127
+ * config/rs6000/altivec.md (UNSPEC_VSUMSWS_DIRECT): New unspec.
10128
+ (altivec_vsumsws): Add handling for -maltivec=be with a little
10130
+ (altivec_vsumsws_direct): New.
10131
+ (reduc_splus_<mode>): Call gen_altivec_vsumsws_direct instead of
10132
+ gen_altivec_vsumsws.
10134
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10136
+ Backport from mainline r207326
10137
+ 2014-01-30 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10139
+ * config/rs6000/rs6000.c (rs6000_expand_vector_init): Remove
10140
+ unused variable "field".
10141
+ * config/rs6000/vsx.md (vsx_mergel_<mode>): Add missing DONE.
10142
+ (vsx_mergeh_<mode>): Likewise.
10143
+ * config/rs6000/altivec.md (altivec_vmrghb): Likewise.
10144
+ (altivec_vmrghh): Likewise.
10145
+ (altivec_vmrghw): Likewise.
10146
+ (altivec_vmrglb): Likewise.
10147
+ (altivec_vmrglh): Likewise.
10148
+ (altivec_vmrglw): Likewise.
10149
+ (altivec_vspltb): Add missing uses.
10150
+ (altivec_vsplth): Likewise.
10151
+ (altivec_vspltw): Likewise.
10152
+ (altivec_vspltsf): Likewise.
10154
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10156
+ Backport from mainline r207318
10157
+ 2014-01-30 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10159
+ * gcc/config/rs6000/rs6000.c (rs6000_expand_vector_init): Use
10160
+ gen_vsx_xxspltw_v4sf_direct instead of gen_vsx_xxspltw_v4sf;
10161
+ remove element index adjustment for endian (now handled in vsx.md
10163
+ (altivec_expand_vec_perm_const): Use
10164
+ gen_altivec_vsplt[bhw]_direct instead of gen_altivec_vsplt[bhw].
10165
+ * gcc/config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTW): New unspec.
10166
+ (vsx_xxspltw_<mode>): Adjust element index for little endian.
10167
+ * gcc/config/rs6000/altivec.md (altivec_vspltb): Divide into a
10168
+ define_expand and a new define_insn *altivec_vspltb_internal;
10169
+ adjust for -maltivec=be on a little endian target.
10170
+ (altivec_vspltb_direct): New.
10171
+ (altivec_vsplth): Divide into a define_expand and a new
10172
+ define_insn *altivec_vsplth_internal; adjust for -maltivec=be on a
10173
+ little endian target.
10174
+ (altivec_vsplth_direct): New.
10175
+ (altivec_vspltw): Divide into a define_expand and a new
10176
+ define_insn *altivec_vspltw_internal; adjust for -maltivec=be on a
10177
+ little endian target.
10178
+ (altivec_vspltw_direct): New.
10179
+ (altivec_vspltsf): Divide into a define_expand and a new
10180
+ define_insn *altivec_vspltsf_internal; adjust for -maltivec=be on
10181
+ a little endian target.
10183
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10185
+ Backport from mainline r207262
10186
+ 2014-01-29 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10188
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Use
10189
+ CODE_FOR_altivec_vmrg*_direct rather than CODE_FOR_altivec_vmrg*.
10190
+ * config/rs6000/vsx.md (vsx_mergel_<mode>): Adjust for
10191
+ -maltivec=be with LE targets.
10192
+ (vsx_mergeh_<mode>): Likewise.
10193
+ * config/rs6000/altivec.md (UNSPEC_VMRG[HL]_DIRECT): New
10195
+ (mulv8hi3): Use gen_altivec_vmrg[hl]w_direct.
10196
+ (altivec_vmrghb): Replace with define_expand and new
10197
+ *altivec_vmrghb_internal insn; adjust for -maltivec=be with LE
10199
+ (altivec_vmrghb_direct): New define_insn.
10200
+ (altivec_vmrghh): Replace with define_expand and new
10201
+ *altivec_vmrghh_internal insn; adjust for -maltivec=be with LE
10203
+ (altivec_vmrghh_direct): New define_insn.
10204
+ (altivec_vmrghw): Replace with define_expand and new
10205
+ *altivec_vmrghw_internal insn; adjust for -maltivec=be with LE
10207
+ (altivec_vmrghw_direct): New define_insn.
10208
+ (*altivec_vmrghsf): Adjust for endianness.
10209
+ (altivec_vmrglb): Replace with define_expand and new
10210
+ *altivec_vmrglb_internal insn; adjust for -maltivec=be with LE
10212
+ (altivec_vmrglb_direct): New define_insn.
10213
+ (altivec_vmrglh): Replace with define_expand and new
10214
+ *altivec_vmrglh_internal insn; adjust for -maltivec=be with LE
10216
+ (altivec_vmrglh_direct): New define_insn.
10217
+ (altivec_vmrglw): Replace with define_expand and new
10218
+ *altivec_vmrglw_internal insn; adjust for -maltivec=be with LE
10220
+ (altivec_vmrglw_direct): New define_insn.
10221
+ (*altivec_vmrglsf): Adjust for endianness.
10222
+ (vec_widen_umult_hi_v16qi): Use gen_altivec_vmrghh_direct.
10223
+ (vec_widen_umult_lo_v16qi): Use gen_altivec_vmrglh_direct.
10224
+ (vec_widen_smult_hi_v16qi): Use gen_altivec_vmrghh_direct.
10225
+ (vec_widen_smult_lo_v16qi): Use gen_altivec_vmrglh_direct.
10226
+ (vec_widen_umult_hi_v8hi): Use gen_altivec_vmrghw_direct.
10227
+ (vec_widen_umult_lo_v8hi): Use gen_altivec_vmrglw_direct.
10228
+ (vec_widen_smult_hi_v8hi): Use gen_altivec_vmrghw_direct.
10229
+ (vec_widen_smult_lo_v8hi): Use gen_altivec_vmrglw_direct.
10231
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10233
+ Backport from mainline r207062
10234
+ 2014-01-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10236
+ * config/rs6000/rs6000.c (rs6000_expand_vec_perm_const_1): Remove
10237
+ correction for little endian...
10238
+ * config/rs6000/vsx.md (vsx_xxpermdi2_<mode>_1): ...and move it to
10241
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10243
+ Backport from mainline r206641
10244
+ 2014-01-15 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
10246
+ * config/rs6000/altivec.md (mulv8hi3): Explicitly generate vmulesh
10247
+ and vmulosh rather than call gen_vec_widen_smult_*.
10248
+ (vec_widen_umult_even_v16qi): Test VECTOR_ELT_ORDER_BIG rather
10249
+ than BYTES_BIG_ENDIAN to determine use of even or odd instruction.
10250
+ (vec_widen_smult_even_v16qi): Likewise.
10251
+ (vec_widen_umult_even_v8hi): Likewise.
10252
+ (vec_widen_smult_even_v8hi): Likewise.
10253
+ (vec_widen_umult_odd_v16qi): Likewise.
10254
+ (vec_widen_smult_odd_v16qi): Likewise.
10255
+ (vec_widen_umult_odd_v8hi): Likewise.
10256
+ (vec_widen_smult_odd_v8hi): Likewise.
10257
+ (vec_widen_umult_hi_v16qi): Explicitly generate vmuleub and
10258
+ vmuloub rather than call gen_vec_widen_umult_*.
10259
+ (vec_widen_umult_lo_v16qi): Likewise.
10260
+ (vec_widen_smult_hi_v16qi): Explicitly generate vmulesb and
10261
+ vmulosb rather than call gen_vec_widen_smult_*.
10262
+ (vec_widen_smult_lo_v16qi): Likewise.
10263
+ (vec_widen_umult_hi_v8hi): Explicitly generate vmuleuh and vmulouh
10264
+ rather than call gen_vec_widen_umult_*.
10265
+ (vec_widen_umult_lo_v8hi): Likewise.
10266
+ (vec_widen_smult_hi_v8hi): Explicitly gnerate vmulesh and vmulosh
10267
+ rather than call gen_vec_widen_smult_*.
10268
+ (vec_widen_smult_lo_v8hi): Likewise.
10270
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10272
+ Backport from mainline r206590
10273
+ 2014-01-13 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10275
+ * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
10276
+ Implement -maltivec=be for vec_insert and vec_extract.
10278
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10280
+ Backport from mainline r206541
10281
+ 2014-01-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10283
+ * config/rs6000/rs6000-builtin.def: Fix pasto for VPKSDUS.
10285
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10287
+ Backport from mainline r206494
10288
+ 2014-01-09 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10290
+ * doc/invoke.texi: Add -maltivec={be,le} options, and document
10291
+ default element-order behavior for -maltivec.
10292
+ * config/rs6000/rs6000.opt: Add -maltivec={be,le} options.
10293
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Ensure
10294
+ that -maltivec={le,be} implies -maltivec; disallow -maltivec=le
10295
+ when targeting big endian, at least for now.
10296
+ * config/rs6000/rs6000.h: Add #define of VECTOR_ELT_ORDER_BIG.
10298
+2014-02-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10300
+ Backport from mainline r206443
10301
+ 2014-01-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10303
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Remove
10304
+ two duplicate entries.
10306
+2014-02-04 Michael Meissner <meissner@linux.vnet.ibm.com>
10308
+ * config/rs6000/rs6000.opt (-mlra): Add switch to enable the LRA
10309
+ register allocator.
10311
+ * config/rs6000/rs6000.c (TARGET_LRA_P): Add support for -mlra to
10312
+ enable the LRA register allocator. Back port the changes from the
10313
+ trunk to enable LRA.
10314
+ (rs6000_legitimate_offset_address_p): Likewise.
10315
+ (legitimate_lo_sum_address_p): Likewise.
10316
+ (use_toc_relative_ref): Likewise.
10317
+ (rs6000_legitimate_address_p): Likewise.
10318
+ (rs6000_emit_move): Likewise.
10319
+ (rs6000_secondary_memory_needed_mode): Likewise.
10320
+ (rs6000_alloc_sdmode_stack_slot): Likewise.
10321
+ (rs6000_lra_p): Likewise.
10323
+ * config/rs6000/sync.md (load_lockedti): Copy TI/PTI variables by
10324
+ 64-bit parts to force the register allocator to allocate even/odd
10325
+ register pairs for the quad word atomic instructions.
10326
+ (store_conditionalti): Likewise.
10328
+2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com>
10330
+ Back port from mainline
10331
+ 2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com>
10334
+ * doc/invoke.texi (RS/6000 and PowerPC Options): Document
10335
+ -mquad-memory-atomic. Update -mquad-memory documentation to say
10336
+ it is only used for non-atomic loads/stores.
10338
+ * config/rs6000/predicates.md (quad_int_reg_operand): Allow either
10339
+ -mquad-memory or -mquad-memory-atomic switches.
10341
+ * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Add
10342
+ -mquad-memory-atomic to ISA 2.07 support.
10344
+ * config/rs6000/rs6000.opt (-mquad-memory-atomic): Add new switch
10345
+ to separate support of normal quad word memory operations (ldq,
10346
+ stq) from the atomic quad word memory operations.
10348
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
10349
+ support to separate non-atomic quad word operations from atomic
10350
+ quad word operations. Disable non-atomic quad word operations in
10351
+ little endian mode so that we don't have to swap words after the
10352
+ load and before the store.
10353
+ (quad_load_store_p): Add comment about atomic quad word support.
10354
+ (rs6000_opt_masks): Add -mquad-memory-atomic to the list of
10355
+ options printed with -mdebug=reg.
10357
+ * config/rs6000/rs6000.h (TARGET_SYNC_TI): Use
10358
+ -mquad-memory-atomic as the test for whether we have quad word
10359
+ atomic instructions.
10360
+ (TARGET_SYNC_HI_QI): If either -mquad-memory-atomic,
10361
+ -mquad-memory, or -mp8-vector are used, allow byte/half-word
10362
+ atomic operations.
10364
+ * config/rs6000/sync.md (load_lockedti): Insure that the address
10365
+ is a proper indexed or indirect address for the lqarx instruction.
10366
+ On little endian systems, swap the hi/lo registers after the lqarx
10368
+ (load_lockedpti): Use indexed_or_indirect_operand predicate to
10369
+ insure the address is valid for the lqarx instruction.
10370
+ (store_conditionalti): Insure that the address is a proper indexed
10371
+ or indirect address for the stqcrx. instruction. On little endian
10372
+ systems, swap the hi/lo registers before doing the stqcrx.
10374
+ (store_conditionalpti): Use indexed_or_indirect_operand predicate to
10375
+ insure the address is valid for the stqcrx. instruction.
10377
+ * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
10378
+ Define __QUAD_MEMORY__ and __QUAD_MEMORY_ATOMIC__ based on what
10379
+ type of quad memory support is available.
10381
+2014-01-22 Alan Modra <amodra@gmail.com>
10383
+ Apply mainline r202190, powerpc64le multilibs and multiarch dir
10384
+ 2013-09-03 Alan Modra <amodra@gmail.com>
10386
+ * config.gcc (powerpc*-*-linux*): Add support for little-endian
10387
+ multilibs to big-endian target and vice versa.
10388
+ * config/rs6000/t-linux64: Use := assignment on all vars.
10389
+ (MULTILIB_EXTRA_OPTS): Remove fPIC.
10390
+ (MULTILIB_OSDIRNAMES): Specify using mapping from multilib_options.
10391
+ * config/rs6000/t-linux64le: New file.
10392
+ * config/rs6000/t-linux64bele: New file.
10393
+ * config/rs6000/t-linux64lebe: New file.
10395
+2014-01-16 Michael Meissner <meissner@linux.vnet.ibm.com>
10397
+ Back port from mainline
10398
+ 2014-01-16 Michael Meissner <meissner@linux.vnet.ibm.com>
10401
+ * config/rs6000/rs6000.md (reload_vsx_from_gprsf): Add little
10402
+ endian support, remove tests for WORDS_BIG_ENDIAN.
10403
+ (p8_mfvsrd_3_<mode>): Likewise.
10404
+ (reload_gpr_from_vsx<mode>): Likewise.
10405
+ (reload_gpr_from_vsxsf): Likewise.
10406
+ (p8_mfvsrd_4_disf): Likewise.
10408
+2014-01-16 Peter Bergner <bergner@vnet.ibm.com>
10410
+ Merge up to 206665.
10411
+ * REVISION: Update subversion id.
10413
+2014-01-13 Peter Bergner <bergner@vnet.ibm.com>
10415
+ Merge up to 206579.
10416
+ * REVISION: Update subversion id.
10418
+2014-01-08 Peter Bergner <bergner@vnet.ibm.com>
10420
+ Merge up to 206404.
10421
+ * REVISION: Update subversion id.
10423
+2013-12-10 Peter Bergner <bergner@vnet.ibm.com>
10425
+ Merge up to 205847.
10426
+ * REVISION: Update subversion id.
10428
+2013-12-03 Peter Bergner <bergner@vnet.ibm.com>
10430
+ Backport from mainline
10431
+ 2013-12-03 Peter Bergner <bergner@vnet.ibm.com>
10433
+ * config/rs6000/htmintrin.h (_TEXASR_INSTRUCTION_FETCH_CONFLICT): Fix
10434
+ typo in macro name.
10435
+ (_TEXASRU_INSTRUCTION_FETCH_CONFLICT): Likewise.
10437
+2013-11-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10439
+ Backport from mainline r205333
10440
+ 2013-11-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10442
+ * config/rs6000/rs6000.c (rs6000_expand_vec_perm_const_1): Correct
10443
+ for little endian.
10445
+2013-11-23 Alan Modra <amodra@gmail.com>
10447
+ Apply mainline r205299.
10448
+ * config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX.
10450
+2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
10452
+ Backport from mainline
10453
+ 2013-11-12 Michael Meissner <meissner@linux.vnet.ibm.com>
10456
+ * config/rs6000/rs6000.md (movdi_internal32): Eliminate
10457
+ constraints that would allow DImode into the traditional Altivec
10458
+ registers, but cause undesirable code generation when loading 0 as
10460
+ (movdi_internal64): Likewise.
10461
+ (cmp<mode>_fpr): Do not use %x for CR register output.
10462
+ (extendsfdf2_fpr): Fix constraints when -mallow-upper-df and
10463
+ -mallow-upper-sf debug switches are used.
10465
+2013-11-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10467
+ Backport from mainline r205241
10468
+ 2013-11-21 Bill Schmidt <wschmidt@vnet.ibm.com>
10470
+ * config/rs6000/vector.md (vec_pack_trunc_v2df): Revert previous
10471
+ little endian change.
10472
+ (vec_pack_sfix_trunc_v2df): Likewise.
10473
+ (vec_pack_ufix_trunc_v2df): Likewise.
10474
+ * config/rs6000/rs6000.c (rs6000_expand_interleave): Correct
10475
+ double checking of endianness.
10477
+2013-11-21 Peter Bergner <bergner@vnet.ibm.com>
10479
+ Backport from mainline r205233.
10480
+ 2013-11-21 Peter Bergner <bergner@vnet.ibm.com>
10482
+ * doc/extend.texi: Document htm builtins.
10484
+2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10486
+ Backport from mainline r205146
10487
+ 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10489
+ * config/rs6000/vsx.md (vsx_set_<mode>): Adjust for little endian.
10490
+ (vsx_extract_<mode>): Likewise.
10491
+ (*vsx_extract_<mode>_one_le): New LE variant on
10492
+ *vsx_extract_<mode>_zero.
10493
+ (vsx_extract_v4sf): Adjust for little endian.
10495
+2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10497
+ Backport from mainline r205123:
10499
+ 2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10501
+ * config/rs6000/rs6000.c (rs6000_cannot_change_mode_class): Do not
10502
+ allow subregs of TDmode in FPRs of smaller size in little-endian.
10503
+ (rs6000_split_multireg_move): When splitting an access to TDmode
10504
+ in FPRs, do not use simplify_gen_subreg.
10506
+2013-11-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10508
+ Backport from mainline r205080
10509
+ 2013-11-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10511
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Adjust
10512
+ V16QI vector splat case for little endian.
10514
+2013-11-20 Alan Modra <amodra@gmail.com>
10516
+ Apply mainline r205060.
10517
+ * config/rs6000/sysv4.h (CC1_ENDIAN_LITTLE_SPEC): Define as empty.
10518
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Default
10519
+ to strict alignment on older processors when little-endian.
10520
+ * config/rs6000/linux64.h (PROCESSOR_DEFAULT64): Default to power8
10523
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10525
+ Backport from mainline r205045:
10527
+ 2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10529
+ * config/rs6000/vector.md ("mov<mode>"): Do not call
10530
+ rs6000_emit_le_vsx_move to move into or out of GPRs.
10531
+ * config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Assert
10532
+ source and destination are not GPR hard regs.
10534
+2013-11-18 Peter Bergner <bergner@vnet.ibm.com>
10536
+ Merge up to 204974.
10537
+ * REVISION: Update subversion id.
10539
+2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10541
+ Backport from mainline r204927:
10543
+ 2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10545
+ * config/rs6000/rs6000.c (rs6000_emit_move): Use low word of
10546
+ sdmode_stack_slot also in little-endian mode.
10548
+2013-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10550
+ Backport from mainline r204920
10551
+ 2011-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10553
+ * config/rs6000/rs6000.c (rs6000_frame_related): Add split_reg
10554
+ parameter and use it in REG_FRAME_RELATED_EXPR note.
10555
+ (emit_frame_save): Call rs6000_frame_related with extra NULL_RTX
10557
+ (rs6000_emit_prologue): Likewise, but for little endian VSX
10558
+ stores, pass the source register of the store instead.
10560
+2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10562
+ Backport from mainline r204862
10563
+ 2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10565
+ * config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X):
10567
+ (altivec_vperm_<mode>): Revert earlier little endian change.
10568
+ (*altivec_vperm_<mode>_internal): Remove.
10569
+ (altivec_vperm_<mode>_uns): Revert earlier little endian change.
10570
+ (*altivec_vperm_<mode>_uns_internal): Remove.
10571
+ * config/rs6000/vector.md (vec_realign_load_<mode>): Revise
10574
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10576
+ Backport from mainline r204842:
10578
+ 2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10580
+ * doc/invoke.texi (-mabi=elfv1, -mabi=elfv2): Document.
10582
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10584
+ Backport from mainline r204809:
10586
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10588
+ * config/rs6000/sysv4le.h (LINUX64_DEFAULT_ABI_ELFv2): Define.
10590
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10592
+ Backport from mainline r204808:
10594
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10595
+ Alan Modra <amodra@gmail.com>
10597
+ * config/rs6000/rs6000.h (RS6000_SAVE_AREA): Handle ABI_ELFv2.
10598
+ (RS6000_SAVE_TOC): Remove.
10599
+ (RS6000_TOC_SAVE_SLOT): New macro.
10600
+ * config/rs6000/rs6000.c (rs6000_parm_offset): New function.
10601
+ (rs6000_parm_start): Use it.
10602
+ (rs6000_function_arg_advance_1): Likewise.
10603
+ (rs6000_emit_prologue): Use RS6000_TOC_SAVE_SLOT.
10604
+ (rs6000_emit_epilogue): Likewise.
10605
+ (rs6000_call_aix): Likewise.
10606
+ (rs6000_output_function_prologue): Do not save/restore r11
10607
+ around calling _mcount for ABI_ELFv2.
10609
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10610
+ Alan Modra <amodra@gmail.com>
10612
+ * config/rs6000/rs6000-protos.h (rs6000_reg_parm_stack_space):
10614
+ * config/rs6000/rs6000.h (RS6000_REG_SAVE): Remove.
10615
+ (REG_PARM_STACK_SPACE): Call rs6000_reg_parm_stack_space.
10616
+ * config/rs6000/rs6000.c (rs6000_parm_needs_stack): New function.
10617
+ (rs6000_function_parms_need_stack): Likewise.
10618
+ (rs6000_reg_parm_stack_space): Likewise.
10619
+ (rs6000_function_arg): Do not replace BLKmode by Pmode when
10620
+ returning a register argument.
10622
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10623
+ Michael Gschwind <mkg@us.ibm.com>
10625
+ * config/rs6000/rs6000.h (FP_ARG_MAX_RETURN): New macro.
10626
+ (ALTIVEC_ARG_MAX_RETURN): Likewise.
10627
+ (FUNCTION_VALUE_REGNO_P): Use them.
10628
+ * config/rs6000/rs6000.c (TARGET_RETURN_IN_MSB): Define.
10629
+ (rs6000_return_in_msb): New function.
10630
+ (rs6000_return_in_memory): Handle ELFv2 homogeneous aggregates.
10631
+ Handle aggregates of up to 16 bytes for ELFv2.
10632
+ (rs6000_function_value): Handle ELFv2 homogeneous aggregates.
10634
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10635
+ Michael Gschwind <mkg@us.ibm.com>
10637
+ * config/rs6000/rs6000.h (AGGR_ARG_NUM_REG): Define.
10638
+ * config/rs6000/rs6000.c (rs6000_aggregate_candidate): New function.
10639
+ (rs6000_discover_homogeneous_aggregate): Likewise.
10640
+ (rs6000_function_arg_boundary): Handle homogeneous aggregates.
10641
+ (rs6000_function_arg_advance_1): Likewise.
10642
+ (rs6000_function_arg): Likewise.
10643
+ (rs6000_arg_partial_bytes): Likewise.
10644
+ (rs6000_psave_function_arg): Handle BLKmode arguments.
10646
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10647
+ Michael Gschwind <mkg@us.ibm.com>
10649
+ * config/rs6000/rs6000.h (AGGR_ARG_NUM_REG): Define.
10650
+ * config/rs6000/rs6000.c (rs6000_aggregate_candidate): New function.
10651
+ (rs6000_discover_homogeneous_aggregate): Likewise.
10652
+ (rs6000_function_arg_boundary): Handle homogeneous aggregates.
10653
+ (rs6000_function_arg_advance_1): Likewise.
10654
+ (rs6000_function_arg): Likewise.
10655
+ (rs6000_arg_partial_bytes): Likewise.
10656
+ (rs6000_psave_function_arg): Handle BLKmode arguments.
10658
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10660
+ * config/rs6000/rs6000.c (machine_function): New member
10662
+ (rs6000_emit_prologue): Set r2_setup_needed if necessary.
10663
+ (rs6000_output_mi_thunk): Set r2_setup_needed.
10664
+ (rs6000_output_function_prologue): Output global entry point
10665
+ prologue and local entry point marker if needed for ABI_ELFv2.
10666
+ Output -mprofile-kernel code here.
10667
+ (output_function_profiler): Do not output -mprofile-kernel
10668
+ code here; moved to rs6000_output_function_prologue.
10669
+ (rs6000_file_start): Output ".abiversion 2" for ABI_ELFv2.
10671
+ (rs6000_emit_move): Do not handle dot symbols for ABI_ELFv2.
10672
+ (rs6000_output_function_entry): Likewise.
10673
+ (rs6000_assemble_integer): Likewise.
10674
+ (rs6000_elf_encode_section_info): Likewise.
10675
+ (rs6000_elf_declare_function_name): Do not create dot symbols
10676
+ or .opd section for ABI_ELFv2.
10678
+ (rs6000_trampoline_size): Update for ABI_ELFv2 trampolines.
10679
+ (rs6000_trampoline_init): Likewise.
10680
+ (rs6000_elf_file_end): Call file_end_indicate_exec_stack
10683
+ (rs6000_call_aix): Handle ELFv2 indirect calls. Do not check
10684
+ for function descriptors in ABI_ELFv2.
10686
+ * config/rs6000/rs6000.md ("*call_indirect_aix<mode>"): Support
10687
+ on ABI_AIX only, not ABI_ELFv2.
10688
+ ("*call_value_indirect_aix<mode>"): Likewise.
10689
+ ("*call_indirect_elfv2<mode>"): New pattern.
10690
+ ("*call_value_indirect_elfv2<mode>"): Likewise.
10692
+ * config/rs6000/predicates.md ("symbol_ref_operand"): Do not
10693
+ check for function descriptors in ABI_ELFv2.
10694
+ ("current_file_function_operand"): Likewise.
10696
+ * config/rs6000/ppc-asm.h [__powerpc64__ && _CALL_ELF == 2]:
10698
+ (FUNC_NAME): Define ELFv2 variant.
10699
+ (JUMP_TARGET): Likewise.
10700
+ (FUNC_START): Likewise.
10701
+ (HIDDEN_FUNC): Likewise.
10702
+ (FUNC_END): Likeiwse.
10704
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10706
+ * config.gcc [powerpc*-*-* | rs6000-*-*]: Support --with-abi=elfv1
10707
+ and --with-abi=elfv2.
10708
+ * config/rs6000/option-defaults.h (OPTION_DEFAULT_SPECS): Add "abi".
10709
+ * config/rs6000/rs6000.opt (mabi=elfv1): New option.
10710
+ (mabi=elfv2): Likewise.
10711
+ * config/rs6000/rs6000-opts.h (enum rs6000_abi): Add ABI_ELFv2.
10712
+ * config/rs6000/linux64.h (DEFAULT_ABI): Do not hard-code to AIX_ABI
10713
+ if !RS6000_BI_ARCH.
10714
+ (ELFv2_ABI_CHECK): New macro.
10715
+ (SUBSUBTARGET_OVERRIDE_OPTIONS): Use it to decide whether to set
10716
+ rs6000_current_abi to ABI_AIX or ABI_ELFv2.
10717
+ (GLIBC_DYNAMIC_LINKER64): Support ELFv2 ld.so version.
10718
+ * config/rs6000/rs6000-c.c (rs6000_cpu_cpp_builtins): Predefine
10719
+ _CALL_ELF and __STRUCT_PARM_ALIGN__ if appropriate.
10721
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Handle ABI_ELFv2.
10722
+ (debug_stack_info): Likewise.
10723
+ (rs6000_file_start): Treat ABI_ELFv2 the same as ABI_AIX.
10724
+ (rs6000_legitimize_tls_address): Likewise.
10725
+ (rs6000_conditional_register_usage): Likewise.
10726
+ (rs6000_emit_move): Likewise.
10727
+ (init_cumulative_args): Likewise.
10728
+ (rs6000_function_arg_advance_1): Likewise.
10729
+ (rs6000_function_arg): Likewise.
10730
+ (rs6000_arg_partial_bytes): Likewise.
10731
+ (rs6000_output_function_entry): Likewise.
10732
+ (rs6000_assemble_integer): Likewise.
10733
+ (rs6000_savres_strategy): Likewise.
10734
+ (rs6000_stack_info): Likewise.
10735
+ (rs6000_function_ok_for_sibcall): Likewise.
10736
+ (rs6000_emit_load_toc_table): Likewise.
10737
+ (rs6000_savres_routine_name): Likewise.
10738
+ (ptr_regno_for_savres): Likewise.
10739
+ (rs6000_emit_prologue): Likewise.
10740
+ (rs6000_emit_epilogue): Likewise.
10741
+ (rs6000_output_function_epilogue): Likewise.
10742
+ (output_profile_hook): Likewise.
10743
+ (output_function_profiler): Likewise.
10744
+ (rs6000_trampoline_size): Likewise.
10745
+ (rs6000_trampoline_init): Likewise.
10746
+ (rs6000_elf_output_toc_section_asm_op): Likewise.
10747
+ (rs6000_elf_encode_section_info): Likewise.
10748
+ (rs6000_elf_reloc_rw_mask): Likewise.
10749
+ (rs6000_elf_declare_function_name): Likewise.
10750
+ (rs6000_function_arg_boundary): Treat ABI_ELFv2 the same as ABI_AIX,
10751
+ except that rs6000_compat_align_parm is always assumed false.
10752
+ (rs6000_gimplify_va_arg): Likewise.
10753
+ (rs6000_call_aix): Update comment.
10754
+ (rs6000_sibcall_aix): Likewise.
10755
+ * config/rs6000/rs6000.md ("tls_gd_aix<TLSmode:tls_abi_suffix>"):
10756
+ Treat ABI_ELFv2 the same as ABI_AIX.
10757
+ ("*tls_gd_call_aix<TLSmode:tls_abi_suffix>"): Likewise.
10758
+ ("tls_ld_aix<TLSmode:tls_abi_suffix>"): Likewise.
10759
+ ("*tls_ld_call_aix<TLSmode:tls_abi_suffix>"): Likewise.
10760
+ ("load_toc_aix_si"): Likewise.
10761
+ ("load_toc_aix_di"): Likewise.
10762
+ ("call"): Likewise.
10763
+ ("call_value"): Likewise.
10764
+ ("*call_local_aix<mode>"): Likewise.
10765
+ ("*call_value_local_aix<mode>"): Likewise.
10766
+ ("*call_nonlocal_aix<mode>"): Likewise.
10767
+ ("*call_value_nonlocal_aix<mode>"): Likewise.
10768
+ ("*call_indirect_aix<mode>"): Likewise.
10769
+ ("*call_value_indirect_aix<mode>"): Likewise.
10770
+ ("sibcall"): Likewise.
10771
+ ("sibcall_value"): Likewise.
10772
+ ("*sibcall_aix<mode>"): Likewise.
10773
+ ("*sibcall_value_aix<mode>"): Likewise.
10774
+ * config/rs6000/predicates.md ("symbol_ref_operand"): Likewise.
10775
+ ("current_file_function_operand"): Likewise.
10777
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10779
+ Backport from mainline r204807:
10781
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10783
+ * config/rs6000/rs6000.c (rs6000_arg_partial_bytes): Simplify logic
10784
+ by making use of the fact that for vector / floating point arguments
10785
+ passed both in VRs/FPRs and in the fixed parameter area, the partial
10786
+ bytes mechanism is in fact not used.
10788
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10790
+ Backport from mainline r204806:
10792
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10794
+ * config/rs6000/rs6000.c (rs6000_psave_function_arg): New function.
10795
+ (rs6000_finish_function_arg): Likewise.
10796
+ (rs6000_function_arg): Use rs6000_psave_function_arg and
10797
+ rs6000_finish_function_arg to handle both vector and floating
10798
+ point arguments that are also passed in GPRs / the stack.
10800
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10802
+ Backport from mainline r204805:
10804
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10806
+ * config/rs6000/rs6000.c (USE_FP_FOR_ARG_P): Remove TYPE argument.
10807
+ (USE_ALTIVEC_FOR_ARG_P): Likewise.
10808
+ (rs6000_darwin64_record_arg_advance_recurse): Update uses.
10809
+ (rs6000_function_arg_advance_1):Likewise.
10810
+ (rs6000_darwin64_record_arg_recurse): Likewise.
10811
+ (rs6000_function_arg): Likewise.
10812
+ (rs6000_arg_partial_bytes): Likewise.
10814
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10816
+ Backport from mainline r204804:
10818
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10820
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Replace
10821
+ "DEFAULT_ABI != ABI_AIX" test by testing for ABI_V4 or ABI_DARWIN.
10822
+ (rs6000_savres_strategy): Likewise.
10823
+ (rs6000_return_addr): Likewise.
10824
+ (rs6000_emit_load_toc_table): Replace "DEFAULT_ABI != ABI_AIX" by
10825
+ testing for ABI_V4 (since ABI_DARWIN is impossible here).
10826
+ (rs6000_emit_prologue): Likewise.
10827
+ (legitimate_lo_sum_address_p): Simplify DEFAULT_ABI test.
10828
+ (rs6000_elf_declare_function_name): Remove duplicated test.
10829
+ * config/rs6000/rs6000.md ("load_toc_v4_PIC_1"): Explicitly test
10830
+ for ABI_V4 (instead of "DEFAULT_ABI != ABI_AIX" test).
10831
+ ("load_toc_v4_PIC_1_normal"): Likewise.
10832
+ ("load_toc_v4_PIC_1_476"): Likewise.
10833
+ ("load_toc_v4_PIC_1b"): Likewise.
10834
+ ("load_toc_v4_PIC_1b_normal"): Likewise.
10835
+ ("load_toc_v4_PIC_1b_476"): Likewise.
10836
+ ("load_toc_v4_PIC_2"): Likewise.
10837
+ ("load_toc_v4_PIC_3b"): Likewise.
10838
+ ("load_toc_v4_PIC_3c"): Likewise.
10839
+ * config/rs6000/rs6000.h (RS6000_REG_SAVE): Simplify DEFAULT_ABI test.
10840
+ (RS6000_SAVE_AREA): Likewise.
10841
+ (FP_ARG_MAX_REG): Likewise.
10842
+ (RETURN_ADDRESS_OFFSET): Likewise.
10843
+ * config/rs6000/sysv.h (TARGET_TOC): Test for ABI_V4 instead
10845
+ (SUBTARGET_OVERRIDE_OPTIONS): Likewise.
10846
+ (MINIMAL_TOC_SECTION_ASM_OP): Likewise.
10848
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10850
+ Backport from mainline r204803:
10852
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10854
+ * config/rs6000/rs6000.c (rs6000_call_indirect_aix): Rename to ...
10855
+ (rs6000_call_aix): ... this. Handle both direct and indirect calls.
10856
+ Create call insn directly instead of via various gen_... routines.
10857
+ Mention special registers used by the call in CALL_INSN_FUNCTION_USAGE.
10858
+ (rs6000_sibcall_aix): New function.
10859
+ * config/rs6000/rs6000.md (TOC_SAVE_OFFSET_32BIT): Remove.
10860
+ (TOC_SAVE_OFFSET_64BIT): Likewise.
10861
+ (AIX_FUNC_DESC_TOC_32BIT): Likewise.
10862
+ (AIX_FUNC_DESC_TOC_64BIT): Likewise.
10863
+ (AIX_FUNC_DESC_SC_32BIT): Likewise.
10864
+ (AIX_FUNC_DESC_SC_64BIT): Likewise.
10865
+ ("call" expander): Call rs6000_call_aix.
10866
+ ("call_value" expander): Likewise.
10867
+ ("call_indirect_aix<ptrsize>"): Replace this pattern ...
10868
+ ("call_indirect_aix<ptrsize>_nor11"): ... and this pattern ...
10869
+ ("*call_indirect_aix<mode>"): ... by this insn pattern.
10870
+ ("call_value_indirect_aix<ptrsize>"): Replace this pattern ...
10871
+ ("call_value_indirect_aix<ptrsize>_nor11"): ... and this pattern ...
10872
+ ("*call_value_indirect_aix<mode>"): ... by this insn pattern.
10873
+ ("*call_nonlocal_aix32", "*call_nonlocal_aix64"): Replace by ...
10874
+ ("*call_nonlocal_aix<mode>"): ... this pattern.
10875
+ ("*call_value_nonlocal_aix32", "*call_value_nonlocal_aix64"): Replace
10876
+ ("*call_value_nonlocal_aix<mode>"): ... by this pattern.
10877
+ ("*call_local_aix<mode>"): New insn pattern.
10878
+ ("*call_value_local_aix<mode>"): Likewise.
10879
+ ("sibcall" expander): Call rs6000_sibcall_aix.
10880
+ ("sibcall_value" expander): Likewise. Move earlier in file.
10881
+ ("*sibcall_nonlocal_aix<mode>"): Replace by ...
10882
+ ("*sibcall_aix<mode>"): ... this pattern.
10883
+ ("*sibcall_value_nonlocal_aix<mode>"): Replace by ...
10884
+ ("*sibcall_value_aix<mode>"): ... this pattern.
10885
+ * config/rs6000/rs6000-protos.h (rs6000_call_indirect_aix): Remove.
10886
+ (rs6000_call_aix): Add prototype.
10887
+ (rs6000_sibcall_aix): Likewise.
10889
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10891
+ Backport from mainline r204799:
10893
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10895
+ * config/rs6000/rs6000.c (rs6000_emit_prologue): Do not place a
10896
+ RTX_FRAME_RELATED_P marker on the UNSPEC_MOVESI_FROM_CR insn.
10897
+ Instead, add USEs of all modified call-saved CR fields to the
10898
+ insn storing the result to the stack slot, and provide an
10899
+ appropriate REG_FRAME_RELATED_EXPR for that insn.
10900
+ * config/rs6000/rs6000.md ("*crsave"): New insn pattern.
10901
+ * config/rs6000/predicates.md ("crsave_operation"): New predicate.
10903
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10905
+ Backport from mainline r204798:
10907
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10908
+ Alan Modra <amodra@gmail.com>
10910
+ * function.c (assign_parms): Use all.reg_parm_stack_space instead
10911
+ of re-evaluating REG_PARM_STACK_SPACE target macro.
10912
+ (locate_and_pad_parm): New parameter REG_PARM_STACK_SPACE. Use it
10913
+ instead of evaluating target macro REG_PARM_STACK_SPACE every time.
10914
+ (assign_parm_find_entry_rtl): Update call.
10915
+ * calls.c (initialize_argument_information): Update call.
10916
+ (emit_library_call_value_1): Likewise.
10917
+ * expr.h (locate_and_pad_parm): Update prototype.
10919
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10921
+ Backport from mainline r204797:
10923
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10925
+ * calls.c (store_unaligned_arguments_into_pseudos): Skip PARALLEL
10928
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10930
+ Backport from mainline r197003:
10932
+ 2013-03-23 Eric Botcazou <ebotcazou@adacore.com>
10934
+ * calls.c (expand_call): Add missing guard to code handling return
10935
+ of non-BLKmode structures in MSB.
10936
+ * function.c (expand_function_end): Likewise.
10938
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
10940
+ Backport from mainline r201750.
10941
+ Note: Default setting of -mcompat-align-parm inverted!
10943
+ 2013-08-14 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10946
+ * doc/invoke.texi: Add documentation of mcompat-align-parm
10948
+ * config/rs6000/rs6000.opt: Add mcompat-align-parm option.
10949
+ * config/rs6000/rs6000.c (rs6000_function_arg_boundary): For AIX
10950
+ and Linux, correct BLKmode alignment when 128-bit alignment is
10951
+ required and compatibility flag is not set.
10952
+ (rs6000_gimplify_va_arg): For AIX and Linux, honor specified
10953
+ alignment for zero-size arguments when compatibility flag is not
10956
+2013-11-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10958
+ * configure: Regenerate.
10960
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10962
+ Backport from mainline r204441
10963
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10965
+ * config/rs6000/rs6000.c (rs6000_option_override_internal):
10966
+ Remove restriction against use of VSX instructions when generating
10967
+ code for little endian mode.
10969
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10971
+ Backport from mainline r204440
10972
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10974
+ * config/rs6000/altivec.md (mulv4si3): Ensure we generate vmulouh
10975
+ for both big and little endian.
10976
+ (mulv8hi3): Swap input operands for merge high and merge low
10977
+ instructions for little endian.
10979
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10981
+ Backport from mainline r204439
10982
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
10984
+ * config/rs6000/altivec.md (vec_widen_umult_even_v16qi): Change
10985
+ define_insn to define_expand that uses even patterns for big
10986
+ endian and odd patterns for little endian.
10987
+ (vec_widen_smult_even_v16qi): Likewise.
10988
+ (vec_widen_umult_even_v8hi): Likewise.
10989
+ (vec_widen_smult_even_v8hi): Likewise.
10990
+ (vec_widen_umult_odd_v16qi): Likewise.
10991
+ (vec_widen_smult_odd_v16qi): Likewise.
10992
+ (vec_widen_umult_odd_v8hi): Likewise.
10993
+ (vec_widen_smult_odd_v8hi): Likewise.
10994
+ (altivec_vmuleub): New define_insn.
10995
+ (altivec_vmuloub): Likewise.
10996
+ (altivec_vmulesb): Likewise.
10997
+ (altivec_vmulosb): Likewise.
10998
+ (altivec_vmuleuh): Likewise.
10999
+ (altivec_vmulouh): Likewise.
11000
+ (altivec_vmulesh): Likewise.
11001
+ (altivec_vmulosh): Likewise.
11003
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11005
+ Backport from mainline r204395
11006
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11008
+ * config/rs6000/vector.md (vec_pack_sfix_trunc_v2df): Adjust for
11010
+ (vec_pack_ufix_trunc_v2df): Likewise.
11012
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11014
+ Backport from mainline r204363
11015
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11017
+ * config/rs6000/altivec.md (vec_widen_umult_hi_v16qi): Swap
11018
+ arguments to merge instruction for little endian.
11019
+ (vec_widen_umult_lo_v16qi): Likewise.
11020
+ (vec_widen_smult_hi_v16qi): Likewise.
11021
+ (vec_widen_smult_lo_v16qi): Likewise.
11022
+ (vec_widen_umult_hi_v8hi): Likewise.
11023
+ (vec_widen_umult_lo_v8hi): Likewise.
11024
+ (vec_widen_smult_hi_v8hi): Likewise.
11025
+ (vec_widen_smult_lo_v8hi): Likewise.
11027
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11029
+ Backport from mainline r204350
11030
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11032
+ * config/rs6000/vsx.md (*vsx_le_perm_store_<mode> for VSX_D):
11033
+ Replace the define_insn_and_split with a define_insn and two
11034
+ define_splits, with the split after reload re-permuting the source
11035
+ register to its original value.
11036
+ (*vsx_le_perm_store_<mode> for VSX_W): Likewise.
11037
+ (*vsx_le_perm_store_v8hi): Likewise.
11038
+ (*vsx_le_perm_store_v16qi): Likewise.
11040
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11042
+ Backport from mainline r204321
11043
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11045
+ * config/rs6000/vector.md (vec_pack_trunc_v2df): Adjust for
11048
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11050
+ Backport from mainline r204321
11051
+ 2013-11-02 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
11053
+ * config/rs6000/rs6000.c (rs6000_expand_vector_set): Adjust for
11056
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11058
+ Backport from mainline r203980
11059
+ 2013-10-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11061
+ * config/rs6000/altivec.md (mulv8hi3): Adjust for little endian.
11063
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11065
+ Backport from mainline r203930
11066
+ 2013-10-22 Bill Schmidt <wschmidt@vnet.ibm.com>
11068
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Reverse
11069
+ meaning of merge-high and merge-low masks for little endian; avoid
11070
+ use of vector-pack masks for little endian for mismatched modes.
11072
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11074
+ Backport from mainline r203877
11075
+ 2013-10-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11077
+ * config/rs6000/altivec.md (vec_unpacku_hi_v16qi): Adjust for
11079
+ (vec_unpacku_hi_v8hi): Likewise.
11080
+ (vec_unpacku_lo_v16qi): Likewise.
11081
+ (vec_unpacku_lo_v8hi): Likewise.
11083
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11085
+ Backport from mainline r203863
11086
+ 2013-10-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11088
+ * config/rs6000/rs6000.c (vspltis_constant): Make sure we check
11089
+ all elements for both endian flavors.
11091
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11093
+ Backport from mainline r203714
11094
+ 2013-10-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11096
+ * gcc/config/rs6000/vector.md (vec_unpacks_hi_v4sf): Correct for
11098
+ (vec_unpacks_lo_v4sf): Likewise.
11099
+ (vec_unpacks_float_hi_v4si): Likewise.
11100
+ (vec_unpacks_float_lo_v4si): Likewise.
11101
+ (vec_unpacku_float_hi_v4si): Likewise.
11102
+ (vec_unpacku_float_lo_v4si): Likewise.
11104
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11106
+ Backport from mainline r203713
11107
+ 2013-10-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11109
+ * config/rs6000/vsx.md (vsx_concat_<mode>): Adjust output for LE.
11110
+ (vsx_concat_v2sf): Likewise.
11112
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11114
+ Backport from mainline r203458
11115
+ 2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11117
+ * config/rs6000/vsx.md (*vsx_le_perm_load_v2di): Generalize to
11118
+ handle vector float as well.
11119
+ (*vsx_le_perm_load_v4si): Likewise.
11120
+ (*vsx_le_perm_store_v2di): Likewise.
11121
+ (*vsx_le_perm_store_v4si): Likewise.
11123
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11125
+ Backport from mainline r203457
11126
+ 2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11128
+ * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
11129
+ directly to circumvent subtract from splat{31} workaround.
11130
+ * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
11132
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
11133
+ * config/rs6000/altivec.md (define_c_enum "unspec"): Add
11134
+ UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
11135
+ (altivec_vperm_<mode>): Convert to define_insn_and_split to
11136
+ separate big and little endian logic.
11137
+ (*altivec_vperm_<mode>_internal): New define_insn.
11138
+ (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
11139
+ separate big and little endian logic.
11140
+ (*altivec_vperm_<mode>_uns_internal): New define_insn.
11141
+ (vec_permv16qi): Add little endian logic.
11143
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11145
+ Backport from mainline r203247
11146
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11148
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const_le): New.
11149
+ (altivec_expand_vec_perm_const): Call it.
11151
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11153
+ Backport from mainline r203246
11154
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11156
+ * config/rs6000/vector.md (mov<mode>): Emit permuted move
11157
+ sequences for LE VSX loads and stores at expand time.
11158
+ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_move): New
11160
+ * config/rs6000/rs6000.c (rs6000_const_vec): New.
11161
+ (rs6000_gen_le_vsx_permute): New.
11162
+ (rs6000_gen_le_vsx_load): New.
11163
+ (rs6000_gen_le_vsx_store): New.
11164
+ (rs6000_gen_le_vsx_move): New.
11165
+ * config/rs6000/vsx.md (*vsx_le_perm_load_v2di): New.
11166
+ (*vsx_le_perm_load_v4si): New.
11167
+ (*vsx_le_perm_load_v8hi): New.
11168
+ (*vsx_le_perm_load_v16qi): New.
11169
+ (*vsx_le_perm_store_v2di): New.
11170
+ (*vsx_le_perm_store_v4si): New.
11171
+ (*vsx_le_perm_store_v8hi): New.
11172
+ (*vsx_le_perm_store_v16qi): New.
11173
+ (*vsx_xxpermdi2_le_<mode>): New.
11174
+ (*vsx_xxpermdi4_le_<mode>): New.
11175
+ (*vsx_xxpermdi8_le_V8HI): New.
11176
+ (*vsx_xxpermdi16_le_V16QI): New.
11177
+ (*vsx_lxvd2x2_le_<mode>): New.
11178
+ (*vsx_lxvd2x4_le_<mode>): New.
11179
+ (*vsx_lxvd2x8_le_V8HI): New.
11180
+ (*vsx_lxvd2x16_le_V16QI): New.
11181
+ (*vsx_stxvd2x2_le_<mode>): New.
11182
+ (*vsx_stxvd2x4_le_<mode>): New.
11183
+ (*vsx_stxvd2x8_le_V8HI): New.
11184
+ (*vsx_stxvd2x16_le_V16QI): New.
11186
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11188
+ Backport from mainline r201235
11189
+ 2013-07-24 Bill Schmidt <wschmidt@linux.ibm.com>
11190
+ Anton Blanchard <anton@au1.ibm.com>
11192
+ * config/rs6000/altivec.md (altivec_vpkpx): Handle little endian.
11193
+ (altivec_vpks<VI_char>ss): Likewise.
11194
+ (altivec_vpks<VI_char>us): Likewise.
11195
+ (altivec_vpku<VI_char>us): Likewise.
11196
+ (altivec_vpku<VI_char>um): Likewise.
11198
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11200
+ Backport from mainline r201208
11201
+ 2013-07-24 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
11202
+ Anton Blanchard <anton@au1.ibm.com>
11204
+ * config/rs6000/vector.md (vec_realign_load_<mode>): Reorder input
11205
+ operands to vperm for little endian.
11206
+ * config/rs6000/rs6000.c (rs6000_expand_builtin): Use lvsr instead
11207
+ of lvsl to create the control mask for a vperm for little endian.
11209
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11211
+ Backport from mainline r201195
11212
+ 2013-07-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11213
+ Anton Blanchard <anton@au1.ibm.com>
11215
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Reverse
11216
+ two operands for little-endian.
11218
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11220
+ Backport from mainline r201193
11221
+ 2013-07-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11222
+ Anton Blanchard <anton@au1.ibm.com>
11224
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Correct
11225
+ selection of field for vector splat in little endian mode.
11227
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11229
+ Backport from mainline r201149
11230
+ 2013-07-22 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
11231
+ Anton Blanchard <anton@au1.ibm.com>
11233
+ * config/rs6000/rs6000.c (rs6000_expand_vector_init): Fix
11234
+ endianness when selecting field to splat.
11236
+2013-10-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11238
+ Backport from mainline
11239
+ 2013-04-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
11242
+ * config/rs6000/rs6000.c (rs6000_emit_swdiv_high_precision): Remove.
11243
+ (rs6000_emit_swdiv_low_precision): Remove.
11244
+ (rs6000_emit_swdiv): Rewrite to handle between one and four
11245
+ iterations of Newton-Raphson generally; modify required number of
11246
+ iterations for some cases.
11247
+ * config/rs6000/rs6000.h (RS6000_RECIP_HIGH_PRECISION_P): Remove.
11249
+2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
11251
+ Backport from mainline
11252
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
11254
+ * config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new
11255
+ fields to the reg_addr array that describes the valid addressing
11256
+ mode for any register, general purpose registers, floating point
11257
+ registers, and Altivec registers.
11258
+ (FIRST_RELOAD_REG_CLASS): Likewise.
11259
+ (LAST_RELOAD_REG_CLASS): Likewise.
11260
+ (struct reload_reg_map_type): Likewise.
11261
+ (reload_reg_map_type): Likewise.
11262
+ (RELOAD_REG_VALID): Likewise.
11263
+ (RELOAD_REG_MULTIPLE): Likewise.
11264
+ (RELOAD_REG_INDEXED): Likewise.
11265
+ (RELOAD_REG_OFFSET): Likewise.
11266
+ (RELOAD_REG_PRE_INCDEC): Likewise.
11267
+ (RELOAD_REG_PRE_MODIFY): Likewise.
11268
+ (reg_addr): Likewise.
11269
+ (mode_supports_pre_incdec_p): New helper functions to say whether
11270
+ a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY.
11271
+ (mode_supports_pre_modify_p): Likewise.
11272
+ (rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to
11273
+ print the valid address mode bits for each mode.
11274
+ (rs6000_debug_print_mode): Likewise.
11275
+ (rs6000_debug_reg_global): Likewise.
11276
+ (rs6000_setup_reg_addr_masks): New function to set up the address
11277
+ mask bits for each type.
11278
+ (rs6000_init_hard_regno_mode_ok): Use memset to clear arrays.
11279
+ Call rs6000_setup_reg_addr_masks to set up the address mask bits.
11280
+ (rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and
11281
+ mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and
11282
+ PRE_MODIFY are supported.
11283
+ (rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec
11284
+ registers, instead of {src,dest}_av_p.
11285
+ (rs6000_print_options_internal): Tweak the debug output slightly.
11287
+ Backport from mainline
11288
+ 2013-10-03 Michael Meissner <meissner@linux.vnet.ibm.com>
11290
+ * config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2,
11291
+ ceildf2, btruncdf2, instead of vsx_* name.
11293
+ * config/rs6000/vsx.md (vsx_add<mode>3): Change arithmetic
11294
+ iterators to only do V2DF and V4SF here. Move the DF code to
11295
+ rs6000.md where it is combined with SF mode. Replace <VSv> with
11296
+ just 'v' since only vector operations are handled with these insns
11297
+ after moving the DF support to rs6000.md.
11298
+ (vsx_sub<mode>3): Likewise.
11299
+ (vsx_mul<mode>3): Likewise.
11300
+ (vsx_div<mode>3): Likewise.
11301
+ (vsx_fre<mode>2): Likewise.
11302
+ (vsx_neg<mode>2): Likewise.
11303
+ (vsx_abs<mode>2): Likewise.
11304
+ (vsx_nabs<mode>2): Likewise.
11305
+ (vsx_smax<mode>3): Likewise.
11306
+ (vsx_smin<mode>3): Likewise.
11307
+ (vsx_sqrt<mode>2): Likewise.
11308
+ (vsx_rsqrte<mode>2): Likewise.
11309
+ (vsx_fms<mode>4): Likewise.
11310
+ (vsx_nfma<mode>4): Likewise.
11311
+ (vsx_copysign<mode>3): Likewise.
11312
+ (vsx_btrunc<mode>2): Likewise.
11313
+ (vsx_floor<mode>2): Likewise.
11314
+ (vsx_ceil<mode>2): Likewise.
11315
+ (vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md.
11316
+ (vsx_sminsf3): Likewise.
11317
+ (vsx_fmadf4): Likewise.
11318
+ (vsx_fmsdf4): Likewise.
11319
+ (vsx_nfmadf4): Likewise.
11320
+ (vsx_nfmsdf4): Likewise.
11321
+ (vsx_cmpdf_internal1): Likewise.
11323
+ * config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it
11324
+ simpler to select whether a target has SPE or traditional floating
11325
+ point support in iterators.
11326
+ (TARGET_DF_SPE): Likewise.
11327
+ (TARGET_SF_FPR): Likewise.
11328
+ (TARGET_DF_FPR): Likewise.
11329
+ (TARGET_SF_INSN): Macros to say whether floating point support
11330
+ exists for a given operation for expanders.
11331
+ (TARGET_DF_INSN): Likewise.
11333
+ * config/rs6000/rs6000.c (Ftrad): New mode attributes to allow
11334
+ combining of SF/DF mode operations, using both traditional and VSX
11336
+ (Fvsx): Likewise.
11340
+ (Ffre): Likewise.
11341
+ (FFRE): Likewise.
11342
+ (abs<mode>2): Combine SF/DF modes using traditional floating point
11343
+ instructions. Add support for using the upper DF registers with
11344
+ VSX support, and SF registers with power8-vector support. Update
11345
+ expanders for operations supported by both the SPE and traditional
11346
+ floating point units.
11347
+ (abs<mode>2_fpr): Likewise.
11348
+ (nabs<mode>2): Likewise.
11349
+ (nabs<mode>2_fpr): Likewise.
11350
+ (neg<mode>2): Likewise.
11351
+ (neg<mode>2_fpr): Likewise.
11352
+ (add<mode>3): Likewise.
11353
+ (add<mode>3_fpr): Likewise.
11354
+ (sub<mode>3): Likewise.
11355
+ (sub<mode>3_fpr): Likewise.
11356
+ (mul<mode>3): Likewise.
11357
+ (mul<mode>3_fpr): Likewise.
11358
+ (div<mode>3): Likewise.
11359
+ (div<mode>3_fpr): Likewise.
11360
+ (sqrt<mode>3): Likewise.
11361
+ (sqrt<mode>3_fpr): Likewise.
11362
+ (fre<Fs>): Likewise.
11363
+ (rsqrt<mode>2): Likewise.
11364
+ (cmp<mode>_fpr): Likewise.
11365
+ (smax<mode>3): Likewise.
11366
+ (smin<mode>3): Likewise.
11367
+ (smax<mode>3_vsx): Likewise.
11368
+ (smin<mode>3_vsx): Likewise.
11369
+ (negsf2): Delete SF operations that are merged with DF.
11370
+ (abssf2): Likewise.
11371
+ (addsf3): Likewise.
11372
+ (subsf3): Likewise.
11373
+ (mulsf3): Likewise.
11374
+ (divsf3): Likewise.
11375
+ (fres): Likewise.
11376
+ (fmasf4_fpr): Likewise.
11377
+ (fmssf4_fpr): Likewise.
11378
+ (nfmasf4_fpr): Likewise.
11379
+ (nfmssf4_fpr): Likewise.
11380
+ (sqrtsf2): Likewise.
11381
+ (rsqrtsf_internal1): Likewise.
11382
+ (smaxsf3): Likewise.
11383
+ (sminsf3): Likewise.
11384
+ (cmpsf_internal1): Likewise.
11385
+ (copysign<mode>3_fcpsgn): Add VSX/power8-vector support.
11386
+ (negdf2): Delete DF operations that are merged with SF.
11387
+ (absdf2): Likewise.
11388
+ (nabsdf2): Likewise.
11389
+ (adddf3): Likewise.
11390
+ (subdf3): Likewise.
11391
+ (muldf3): Likewise.
11392
+ (divdf3): Likewise.
11393
+ (fred): Likewise.
11394
+ (rsqrtdf_internal1): Likewise.
11395
+ (fmadf4_fpr): Likewise.
11396
+ (fmsdf4_fpr): Likewise.
11397
+ (nfmadf4_fpr): Likewise.
11398
+ (nfmsdf4_fpr): Likewise.
11399
+ (sqrtdf2): Likewise.
11400
+ (smaxdf3): Likewise.
11401
+ (smindf3): Likewise.
11402
+ (cmpdf_internal1): Likewise.
11403
+ (lrint<mode>di2): Use TARGET_<MODE>_FPR macro.
11404
+ (btrunc<mode>2): Delete separate expander, and combine with the
11405
+ insn and add VSX instruction support. Use TARGET_<MODE>_FPR.
11406
+ (btrunc<mode>2_fpr): Likewise.
11407
+ (ceil<mode>2): Likewise.
11408
+ (ceil<mode>2_fpr): Likewise.
11409
+ (floor<mode>2): Likewise.
11410
+ (floor<mode>2_fpr): Likewise.
11411
+ (fma<mode>4_fpr): Combine SF and DF fused multiply/add support.
11412
+ Add support for using the upper registers with VSX and
11413
+ power8-vector. Move insns to be closer to the define_expands. On
11414
+ VSX systems, prefer the traditional form of FMA over the VSX
11415
+ version, since the traditional form allows the target not to
11416
+ overlap with the inputs.
11417
+ (fms<mode>4_fpr): Likewise.
11418
+ (nfma<mode>4_fpr): Likewise.
11419
+ (nfms<mode>4_fpr): Likewise.
11421
+ Backport from mainline
11422
+ 2013-09-27 Michael Meissner <meissner@linux.vnet.ibm.com>
11424
+ * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
11425
+ DFmode, DImode, and SFmode in the upper VSX registers based on the
11426
+ -mupper-regs-{df,sf} flags. Fix wu constraint to be ALTIVEC_REGS
11427
+ if -mpower8-vector. Combine -mvsx-timode handling with the rest
11428
+ of the VSX register handling.
11430
+ * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
11431
+ (f32_sv): Likewise.
11432
+ (zero_extendsidi2_lfiwzx): Add support for loading into the
11433
+ Altivec registers with -mpower8-vector. Use wu/wv constraints to
11434
+ only do VSX memory options on Altivec registers.
11435
+ (extendsidi2_lfiwax): Likewise.
11436
+ (extendsfdf2_fpr): Likewise.
11437
+ (mov<mode>_hardfloat, SF/SD modes): Likewise.
11438
+ (mov<mode>_hardfloat32, DF/DD modes): Likewise.
11439
+ (mov<mode>_hardfloat64, DF/DD modes): Likewise.
11440
+ (movdi_internal64): Likewise.
11442
+ Backport from mainline
11443
+ 2013-09-23 Michael Meissner <meissner@linux.vnet.ibm.com>
11445
+ * config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine
11446
+ reload helper function arrays into a single array reg_addr.
11447
+ (reload_fpr_gpr): Likewise.
11448
+ (reload_gpr_vsx): Likewise.
11449
+ (reload_vsx_gpr): Likewise.
11450
+ (struct rs6000_reg_addr): Likewise.
11451
+ (reg_addr): Likewise.
11452
+ (rs6000_debug_reg_global): Change rs6000_vector_reload,
11453
+ reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr.
11454
+ (rs6000_init_hard_regno_mode_ok): Likewise.
11455
+ (rs6000_secondary_reload_direct_move): Likewise.
11456
+ (rs6000_secondary_reload): Likewise.
11458
+ * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new
11459
+ constraints: wu, ww, and wy. Repurpose wv constraint added during
11460
+ power8 changes. Put wg constraint in alphabetical order.
11462
+ * config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch
11463
+ for future work to add ISA 2.07 VSX single precision support.
11464
+ (-mvsx-scalar-double): Change default from -1 to 1, update
11465
+ documentation comment.
11466
+ (-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df.
11467
+ (-mupper-regs-df): New debug switch to control whether DF values
11468
+ can go in the traditional Altivec registers.
11469
+ (-mupper-regs-sf): New debug switch to control whether SF values
11470
+ can go in the traditional Altivec registers.
11472
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww,
11473
+ and wy constraints.
11474
+ (rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for
11475
+ loop variables. Rename -mvsx-scalar-memory to -mupper-regs-df.
11476
+ Add new constraints, wu/ww/wy. Repurpose wv constraint.
11477
+ (rs6000_debug_legitimate_address_p): Print if we are running
11478
+ before, during, or after reload.
11479
+ (rs6000_secondary_reload): Add a comment.
11480
+ (rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf.
11482
+ * config/rs6000/constraints.md (wa constraint): Sort w<x>
11483
+ constraints. Update documentation string.
11484
+ (wd constraint): Likewise.
11485
+ (wf constraint): Likewise.
11486
+ (wg constraint): Likewise.
11487
+ (wn constraint): Likewise.
11488
+ (ws constraint): Likewise.
11489
+ (wt constraint): Likewise.
11490
+ (wx constraint): Likewise.
11491
+ (wz constraint): Likewise.
11492
+ (wu constraint): New constraint for ISA 2.07 SFmode scalar
11494
+ (ww constraint): Likewise.
11495
+ (wy constraint): Likewise.
11496
+ (wv constraint): Repurpose ISA 2.07 constraint that did not use in
11497
+ the previous submissions.
11498
+ * doc/md.texi (PowerPC and IBM RS6000): Likewise.
11500
+ Backport from mainline
11501
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
11504
+ * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
11505
+ restrict TImode addresses to single indirect registers if both
11506
+ -mquad-memory and -mvsx-timode are used.
11507
+ (rs6000_output_move_128bit): Use quad_load_store_p to determine if
11508
+ we should emit load/store quad. Remove using %y for quad memory
11511
+ * config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
11512
+ constraints to allow load/store quad on machines where TImode is
11513
+ not allowed in VSX registers. Use 'n' instead of 'F' constraint
11514
+ for TImode to load integer constants.
11516
+2013-10-02 Michael Meissner <meissner@linux.vnet.ibm.com>
11518
+ Backport from mainline
11519
+ 2013-10-02 Michael Meissner <meissner@linux.vnet.ibm.com>
11522
+ * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off
11523
+ setting -mvsx-timode by default until the underlying problem is
11525
+ (RS6000_CPU, power7 defaults): Likewise.
11527
+2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
11529
+ Backport from mainline
11530
+ 2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
11531
+ Jakub Jelinek <jakub@redhat.com>
11533
+ * builtins.def (BUILT_IN_FABSD32): New DFP ABS builtin.
11534
+ (BUILT_IN_FABSD64): Likewise.
11535
+ (BUILT_IN_FABSD128): Likewise.
11536
+ * builtins.c (expand_builtin): Add support for
11537
+ new DFP ABS builtins.
11538
+ (fold_builtin_1): Likewise.
11539
+ * config/rs6000/dfp.md
11540
+ (*negtd2_fpr): Handle
11541
+ non-overlapping destination
11542
+ and source operands.
11548
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
11550
+ Backport from trunk
11551
+ 2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
11554
+ * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the
11555
+ memory rtx to contain ZERO_EXTEND and SIGN_EXTEND.
11557
+ * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands
11558
+ array instead of each individual operand as a separate argument.
11559
+ (emit_fusion_gpr_load): Likewise.
11560
+ (expand_fusion_gpr_load): Add new function declaration.
11562
+ * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling
11563
+ signature to have the operands passed as an array, instead of as
11564
+ separate arguments. Allow ZERO_EXTEND to be in the memory
11565
+ address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not
11566
+ depend on the register live/dead flags when peepholes are run.
11567
+ (expand_fusion_gpr_load): New function to be called from the
11568
+ peephole2 pass, to change the register that addis sets to be the
11570
+ (emit_fusion_gpr_load): Change the calling signature to have the
11571
+ operands passed as an array, instead of as separate arguments.
11572
+ Allow ZERO_EXTEND to be in the memory address, and also
11573
+ SIGN_EXTEND if -mpower8-fusion-sign.
11575
+ * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused
11576
+ unspec enumeration.
11577
+ (power8 fusion peephole/peephole2): Rework the fusion peepholes to
11578
+ adjust the register addis loads up in the peephole2 pass. Do not
11579
+ depend on the register live/dead state when the peephole pass is
11582
+ Backport from trunk
11583
+ 2013-07-23 Michael Meissner <meissner@linux.vnet.ibm.com>
11585
+ * config/rs6000/vector.md (xor<mode>3): Move 128-bit boolean
11586
+ expanders to rs6000.md.
11587
+ (ior<mode>3): Likewise.
11588
+ (and<mode>3): Likewise.
11589
+ (one_cmpl<mode>2): Likewise.
11590
+ (nor<mode>3): Likewise.
11591
+ (andc<mode>3): Likewise.
11592
+ (eqv<mode>3): Likewise.
11593
+ (nand<mode>3): Likewise.
11594
+ (orc<mode>3): Likewise.
11596
+ * config/rs6000/rs6000-protos.h (rs6000_split_logical): New
11599
+ * config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support
11600
+ to split multi-word logical operations.
11601
+ (rs6000_split_logical_di): Likewise.
11602
+ (rs6000_split_logical): Likewise.
11604
+ * config/rs6000/vsx.md (VSX_L2): Delete, no longer used.
11605
+ (vsx_and<mode>3_32bit): Move 128-bit logical insns to rs6000.md,
11606
+ and allow TImode operations in 32-bit.
11607
+ (vsx_and<mode>3_64bit): Likewise.
11608
+ (vsx_ior<mode>3_32bit): Likewise.
11609
+ (vsx_ior<mode>3_64bit): Likewise.
11610
+ (vsx_xor<mode>3_32bit): Likewise.
11611
+ (vsx_xor<mode>3_64bit): Likewise.
11612
+ (vsx_one_cmpl<mode>2_32bit): Likewise.
11613
+ (vsx_one_cmpl<mode>2_64bit): Likewise.
11614
+ (vsx_nor<mode>3_32bit): Likewise.
11615
+ (vsx_nor<mode>3_64bit): Likewise.
11616
+ (vsx_andc<mode>3_32bit): Likewise.
11617
+ (vsx_andc<mode>3_64bit): Likewise.
11618
+ (vsx_eqv<mode>3_32bit): Likewise.
11619
+ (vsx_eqv<mode>3_64bit): Likewise.
11620
+ (vsx_nand<mode>3_32bit): Likewise.
11621
+ (vsx_nand<mode>3_64bit): Likewise.
11622
+ (vsx_orc<mode>3_32bit): Likewise.
11623
+ (vsx_orc<mode>3_64bit): Likewise.
11625
+ * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector
11626
+ logical types in GPRs.
11628
+ * config/rs6000/altivec.md (altivec_and<mode>3): Move 128-bit
11629
+ logical insns to rs6000.md, and allow TImode operations in
11631
+ (altivec_ior<mode>3): Likewise.
11632
+ (altivec_xor<mode>3): Likewise.
11633
+ (altivec_one_cmpl<mode>2): Likewise.
11634
+ (altivec_nor<mode>3): Likewise.
11635
+ (altivec_andc<mode>3): Likewise.
11637
+ * config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode
11638
+ attributes for moving the 128-bit logical operations into
11640
+ (BOOL_REGS_OUTPUT): Likewise.
11641
+ (BOOL_REGS_OP1): Likewise.
11642
+ (BOOL_REGS_OP2): Likewise.
11643
+ (BOOL_REGS_UNARY): Likewise.
11644
+ (BOOL_REGS_AND_CR0): Likewise.
11645
+ (one_cmpl<mode>2): Add support for DI logical operations on
11646
+ 32-bit, splitting the operations to 32-bit.
11647
+ (anddi3): Likewise.
11648
+ (iordi3): Likewise.
11649
+ (xordi3): Likewise.
11650
+ (and<mode>3, 128-bit types): Rewrite 2013-06-06 logical operator
11651
+ changes to combine the 32/64-bit code, allow logical operations on
11652
+ TI mode in 32-bit, and to use similar match_operator patterns like
11653
+ scalar mode uses. Combine the Altivec and VSX code for logical
11654
+ operations, and move it here.
11655
+ (ior<mode>3, 128-bit types): Likewise.
11656
+ (xor<mode>3, 128-bit types): Likewise.
11657
+ (one_cmpl<mode>3, 128-bit types): Likewise.
11658
+ (nor<mode>3, 128-bit types): Likewise.
11659
+ (andc<mode>3, 128-bit types): Likewise.
11660
+ (eqv<mode>3, 128-bit types): Likewise.
11661
+ (nand<mode>3, 128-bit types): Likewise.
11662
+ (orc<mode>3, 128-bit types): Likewise.
11663
+ (and<mode>3_internal): Likewise.
11664
+ (bool<mode>3_internal): Likewise.
11665
+ (boolc<mode>3_internal1): Likewise.
11666
+ (boolc<mode>3_internal2): Likewise.
11667
+ (boolcc<mode>3_internal1): Likewise.
11668
+ (boolcc<mode>3_internal2): Likewise.
11669
+ (eqv<mode>3_internal1): Likewise.
11670
+ (eqv<mode>3_internal2): Likewise.
11671
+ (one_cmpl1<mode>3_internal): Likewise.
11673
+2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
11675
+ Backport from mainline
11676
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
11678
+ * config/rs6000/predicates.md (fusion_gpr_addis): New predicates
11679
+ to support power8 load fusion.
11680
+ (fusion_gpr_mem_load): Likewise.
11682
+ * config/rs6000/rs6000-modes.def (PTImode): Update a comment.
11684
+ * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
11685
+ declarations for power8 load fusion.
11686
+ (emit_fusion_gpr_load): Likewise.
11688
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): If
11689
+ tuning for power8, turn on fusion mode by default. Turn on sign
11690
+ extending fusion mode if normal fusion mode is on, and we are at
11692
+ (fusion_gpr_load_p): New function, return true if we can fuse an
11693
+ addis instruction with a dependent load to a GPR.
11694
+ (emit_fusion_gpr_load): Emit the instructions for power8 load
11697
+ * config/rs6000/vsx.md (VSX_M2): New iterator for fusion
11699
+ (VSX load fusion peepholes): New peepholes to fuse together an
11700
+ addi instruction with a VSX load instruction.
11702
+ * config/rs6000/rs6000.md (GPR load fusion peepholes): New
11703
+ peepholes to fuse an addis instruction with a load to a GPR base
11704
+ register. If we are supporting sign extending fusions, convert
11705
+ sign extending loads to zero extending loads and add an explicit
11708
+2013-07-19 Pat Haugen <pthaugen@us.ibm.com>
11710
+ Backport from mainline
11711
+ 2013-07-18 Pat Haugen <pthaugen@us.ibm.com>
11713
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag
11714
+ interaction for new Power8 flags and VSX.
11716
+2013-07-17 Peter Bergner <bergner@vnet.ibm.com>
11718
+ Backport from mainline
11719
+ 2013-07-17 Iain Sandoe <iain@codesourcery.com>
11721
+ * config/rs6000/darwin.h (REGISTER_NAMES): Add HTM registers.
11723
+2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
11725
+ Merge up to 200989.
11726
+ * REVISION: Update subversion id.
11728
+2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
11730
+ Backport from mainline
11731
+ 2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
11733
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Do not
11734
+ enable extra ISA flags with TARGET_HTM.
11736
+ 2013-07-16 Jakub Jelinek <jakub@redhat.com>
11737
+ Peter Bergner <bergner@vnet.ibm.com>
11739
+ * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTERS): Mention HTM
11740
+ registers in the comment.
11741
+ (DWARF_FRAME_REGISTERS): Subtract also the 3 HTM registers.
11742
+ (DWARF_REG_TO_UNWIND_COLUMN): Use DWARF_FRAME_REGISTERS
11743
+ rather than FIRST_PSEUDO_REGISTERS.
11745
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
11747
+ Backport from mainline
11748
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
11750
+ * config.gcc (powerpc*-*-*): Install htmintrin.h and htmxlintrin.h.
11751
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add htm.md.
11752
+ * config/rs6000/rs6000.opt: Add -mhtm option.
11753
+ * config/rs6000/rs6000-cpus.def (POWERPC_MASKS): Add OPTION_MASK_HTM.
11754
+ (ISA_2_7_MASKS_SERVER): Add OPTION_MASK_HTM.
11755
+ * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
11756
+ __HTM__ if the HTM instructions are available.
11757
+ * config/rs6000/predicates.md (u3bit_cint_operand, u10bit_cint_operand,
11758
+ htm_spr_reg_operand): New define_predicates.
11759
+ * config/rs6000/rs6000.md (define_attr "type"): Add htm.
11760
+ (TFHAR_REGNO, TFIAR_REGNO, TEXASR_REGNO): New define_constants.
11762
+ * config/rs6000/rs6000-builtin.def (BU_HTM_0, BU_HTM_1, BU_HTM_2,
11763
+ BU_HTM_3, BU_HTM_SPR0, BU_HTM_SPR1): Add support macros for defining
11764
+ HTM builtin functions.
11765
+ * config/rs6000/rs6000.c (RS6000_BUILTIN_H): New macro.
11766
+ (rs6000_reg_names, alt_reg_names): Add HTM SPR register names.
11767
+ (rs6000_init_hard_regno_mode_ok): Add support for HTM instructions.
11768
+ (rs6000_builtin_mask_calculate): Likewise.
11769
+ (rs6000_option_override_internal): Likewise.
11770
+ (bdesc_htm): Add new HTM builtin support.
11771
+ (htm_spr_num): New function.
11772
+ (htm_spr_regno): Likewise.
11773
+ (rs6000_htm_spr_icode): Likewise.
11774
+ (htm_expand_builtin): Likewise.
11775
+ (htm_init_builtins): Likewise.
11776
+ (rs6000_expand_builtin): Add support for HTM builtin functions.
11777
+ (rs6000_init_builtins): Likewise.
11778
+ (rs6000_invalid_builtin, rs6000_opt_mask): Add support for -mhtm option.
11779
+ * config/rs6000/rs6000.h (ASM_CPU_SPEC): Add support for -mhtm.
11780
+ (TARGET_HTM, MASK_HTM): Define macros.
11781
+ (FIRST_PSEUDO_REGISTER): Adjust for new HTM SPR registers.
11782
+ (FIXED_REGISTERS): Likewise.
11783
+ (CALL_USED_REGISTERS): Likewise.
11784
+ (CALL_REALLY_USED_REGISTERS): Likewise.
11785
+ (REG_ALLOC_ORDER): Likewise.
11786
+ (enum reg_class): Likewise.
11787
+ (REG_CLASS_NAMES): Likewise.
11788
+ (REG_CLASS_CONTENTS): Likewise.
11789
+ (REGISTER_NAMES): Likewise.
11790
+ (ADDITIONAL_REGISTER_NAMES): Likewise.
11791
+ (RS6000_BTC_SPR, RS6000_BTC_VOID, RS6000_BTC_32BIT, RS6000_BTC_64BIT,
11792
+ RS6000_BTC_MISC_MASK, RS6000_BTM_HTM): New macros.
11793
+ (RS6000_BTM_COMMON): Add RS6000_BTM_HTM.
11794
+ * config/rs6000/htm.md: New file.
11795
+ * config/rs6000/htmintrin.h: New file.
11796
+ * config/rs6000/htmxlintrin.h: New file.
11798
+2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
11800
+ Back port from the trunk
11801
+ 2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
11804
+ * config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode
11805
+ to tie with any other modes. Eliminate Altivec vector mode tests,
11806
+ since these are a subset of ALTIVEC or VSX vector modes. Simplify
11807
+ code, to return 0 if testing MODE2 for a condition, if we've
11808
+ already tested MODE1 for the same condition.
11810
+2013-06-28 Pat Haugen <pthaugen@us.ibm.com>
11812
+ * config/rs6000/rs6000.md (define_insn ""): Fix insn type.
11814
+2013-06-26 Pat Haugen <pthaugen@us.ibm.com>
11816
+ Back port from the trunk
11817
+ 2013-06-26 Michael Meissner <meissner@linux.vnet.ibm.com>
11818
+ Pat Haugen <pthaugen@us.ibm.com>
11819
+ Peter Bergner <bergner@vnet.ibm.com>
11821
+ * config/rs6000/power8.md: New.
11822
+ * config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor
11823
+ setting for power8 entry.
11824
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md.
11825
+ * config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust
11826
+ test for Power4/Power5 only.
11827
+ (insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8
11829
+ (force_new_group): Adjust comment.
11830
+ * config/rs6000/rs6000.md: Include power8.md.
11832
+2013-06-14 Michael Meissner <meissner@linux.vnet.ibm.com>
11834
+ Back port from the trunk
11835
+ 2013-06-14 Michael Meissner <meissner@linux.vnet.ibm.com>
11838
+ * config/rs6000/rs6000.md (mov<mode>_ppc64): Call
11839
+ rs6000_output_move_128bit to handle emitting quad memory
11840
+ operations. Set attribute length to 8 bytes.
11842
+2013-06-13 Michael Meissner <meissner@linux.vnet.ibm.com>
11844
+ Back port from the trunk
11845
+ 2013-06-13 Michael Meissner <meissner@linux.vnet.ibm.com>
11847
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Move
11848
+ test for clearing quad memory on 32-bit later.
11850
+2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
11852
+ Back port from the trunk
11854
+ Backport from mainline
11855
+ 2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
11856
+ Pat Haugen <pthaugen@us.ibm.com>
11857
+ Peter Bergner <bergner@vnet.ibm.com>
11859
+ * config/rs6000/rs6000.c (emit_load_locked): Add support for
11860
+ power8 byte, half-word, and quad-word atomic instructions.
11861
+ (emit_store_conditional): Likewise.
11862
+ (rs6000_expand_atomic_compare_and_swap): Likewise.
11863
+ (rs6000_expand_atomic_op): Likewise.
11865
+ * config/rs6000/sync.md (larx): Add new modes for power8.
11866
+ (stcx): Likewise.
11867
+ (AINT): New mode iterator to include TImode as well as normal
11868
+ integer modes on power8.
11869
+ (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
11870
+ that VSX registers are not considered. Use AINT mode iterator
11871
+ instead of INT1 to allow inclusion of quad word atomic operations
11873
+ (load_locked<mode>): Likewise.
11874
+ (store_conditional<mode>): Likewise.
11875
+ (atomic_compare_and_swap<mode>): Likewise.
11876
+ (atomic_exchange<mode>): Likewise.
11877
+ (atomic_nand<mode>): Likewise.
11878
+ (atomic_fetch_<fetchop_name><mode>): Likewise.
11879
+ (atomic_nand_fetch<mode>): Likewise.
11880
+ (mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
11882
+ (ATOMIC): On power8, add QImode, HImode modes.
11883
+ (load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
11884
+ modes that promote to SImode.
11885
+ (load_lockedti): Convert TImode arguments to PTImode, so that we
11886
+ get a guaranteed even/odd register pair.
11887
+ (load_lockedpti): Likewise.
11888
+ (store_conditionalti): Likewise.
11889
+ (store_conditionalpti): Likewise.
11891
+ * config/rs6000/rs6000.md (QHI): New mode iterator for power8
11892
+ atomic load/store instructions.
11895
+2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
11897
+ Back port from the trunk
11899
+ 2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
11900
+ Pat Haugen <pthaugen@us.ibm.com>
11901
+ Peter Bergner <bergner@vnet.ibm.com>
11903
+ * config/rs6000/rs6000.c (emit_load_locked): Add support for
11904
+ power8 byte, half-word, and quad-word atomic instructions.
11905
+ (emit_store_conditional): Likewise.
11906
+ (rs6000_expand_atomic_compare_and_swap): Likewise.
11907
+ (rs6000_expand_atomic_op): Likewise.
11909
+ * config/rs6000/sync.md (larx): Add new modes for power8.
11910
+ (stcx): Likewise.
11911
+ (AINT): New mode iterator to include TImode as well as normal
11912
+ integer modes on power8.
11913
+ (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
11914
+ that VSX registers are not considered. Use AINT mode iterator
11915
+ instead of INT1 to allow inclusion of quad word atomic operations
11917
+ (load_locked<mode>): Likewise.
11918
+ (store_conditional<mode>): Likewise.
11919
+ (atomic_compare_and_swap<mode>): Likewise.
11920
+ (atomic_exchange<mode>): Likewise.
11921
+ (atomic_nand<mode>): Likewise.
11922
+ (atomic_fetch_<fetchop_name><mode>): Likewise.
11923
+ (atomic_nand_fetch<mode>): Likewise.
11924
+ (mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
11926
+ (ATOMIC): On power8, add QImode, HImode modes.
11927
+ (load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
11928
+ modes that promote to SImode.
11929
+ (load_lockedti): Convert TImode arguments to PTImode, so that we
11930
+ get a guaranteed even/odd register pair.
11931
+ (load_lockedpti): Likewise.
11932
+ (store_conditionalti): Likewise.
11933
+ (store_conditionalpti): Likewise.
11935
+ * config/rs6000/rs6000.md (QHI): New mode iterator for power8
11936
+ atomic load/store instructions.
11940
+ * config/rs6000/driver-rs6000.c (elf_platform): Make buffer static
11941
+ to allow returning address to AT_PLATFORM name.
11943
+ Back port from the trunk
11945
+ 2013-06-10 Michael Meissner <meissner@linux.vnet.ibm.com>
11946
+ Pat Haugen <pthaugen@us.ibm.com>
11947
+ Peter Bergner <bergner@vnet.ibm.com>
11949
+ * config/rs6000/vector.md (GPR move splitter): Do not split moves
11950
+ of vectors in GPRS if they are direct moves or quad word load or
11953
+ * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add
11955
+ (direct_move_p): Likewise.
11956
+ (quad_load_store_p): Likewise.
11958
+ * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register
11959
+ classes into bins based on the physical register type.
11960
+ (reg_class_to_reg_type): Likewise.
11961
+ (IS_STD_REG_TYPE): Likewise.
11962
+ (IS_FP_VECT_REG_TYPE): Likewise.
11963
+ (reload_fpr_gpr): Arrays to determine what insn to use if we can
11964
+ use direct move instructions.
11965
+ (reload_gpr_vsx): Likewise.
11966
+ (reload_vsx_gpr): Likewise.
11967
+ (rs6000_init_hard_regno_mode_ok): Precalculate the register type
11968
+ information that is a simplification of register classes. Also
11969
+ precalculate direct move reload helpers.
11970
+ (direct_move_p): New function to return true if the operation can
11971
+ be done as a direct move instruciton.
11972
+ (quad_load_store_p): New function to return true if the operation
11973
+ is a quad memory operation.
11974
+ (rs6000_legitimize_address): If quad memory, only allow register
11975
+ indirect for TImode addresses.
11976
+ (rs6000_legitimate_address_p): Likewise.
11977
+ (enum reload_reg_type): Delete, replace with rs6000_reg_type.
11978
+ (rs6000_reload_register_type): Likewise.
11979
+ (register_to_reg_type): Return register type.
11980
+ (rs6000_secondary_reload_simple_move): New helper function for
11981
+ secondary reload and secondary memory needed to identify anything
11982
+ that is a simple move, and does not need reloading.
11983
+ (rs6000_secondary_reload_direct_move): New helper function for
11984
+ secondary reload to identify cases that can be done with several
11985
+ instructions via the direct move instructions.
11986
+ (rs6000_secondary_reload_move): New helper function for secondary
11987
+ reload to identify moves between register types that can be done.
11988
+ (rs6000_secondary_reload): Add support for quad memory operations
11989
+ and for direct move.
11990
+ (rs6000_secondary_memory_needed): Likewise.
11991
+ (rs6000_debug_secondary_memory_needed): Change argument names.
11992
+ (rs6000_output_move_128bit): New function to return the move to
11993
+ use for 128-bit moves, including knowing about the various
11994
+ limitations of quad memory operations.
11996
+ * config/rs6000/vsx.md (vsx_mov<mode>): Add support for quad
11997
+ memory operations. call rs6000_output_move_128bit for the actual
11998
+ instruciton(s) to generate.
11999
+ (vsx_movti_64bit): Likewise.
12001
+ * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values.
12002
+ (UNSPEC_P8V_MTVSRWZ): Likewise.
12003
+ (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise.
12004
+ (UNSPEC_P8V_MTVSRD): Likewise.
12005
+ (UNSPEC_P8V_XXPERMDI): Likewise.
12006
+ (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise.
12007
+ (UNSPEC_FUSION_GPR): Likewise.
12008
+ (FMOVE128_GPR): New iterator for direct move.
12009
+ (f32_lv): New mode attribute for load/store of SFmode/SDmode
12011
+ (f32_sv): Likewise.
12012
+ (f32_dm): Likewise.
12013
+ (zero_extend<mode>di2_internal1): Add support for power8 32-bit
12014
+ loads and direct move instructions.
12015
+ (zero_extendsidi2_lfiwzx): Likewise.
12016
+ (extendsidi2_lfiwax): Likewise.
12017
+ (extendsidi2_nocell): Likewise.
12018
+ (floatsi<mode>2_lfiwax): Likewise.
12019
+ (lfiwax): Likewise.
12020
+ (floatunssi<mode>2_lfiwzx): Likewise.
12021
+ (lfiwzx): Likewise.
12022
+ (fix_trunc<mode>_stfiwx): Likewise.
12023
+ (fixuns_trunc<mode>_stfiwx): Likewise.
12024
+ (mov<mode>_hardfloat, 32-bit floating point): Likewise.
12025
+ (mov<move>_hardfloat64, 64-bit floating point): Likewise.
12026
+ (parity<mode>2_cmpb): Set length/type attr.
12027
+ (unnamed shift right patterns, mov<mode>_internal2): Change type attr
12028
+ for 'mr.' to fast_compare.
12029
+ (bpermd_<mode>): Change type attr to popcnt.
12030
+ (p8_fmrgow_<mode>): New insns for power8 direct move support.
12031
+ (p8_mtvsrwz_1): Likewise.
12032
+ (p8_mtvsrwz_2): Likewise.
12033
+ (reload_fpr_from_gpr<mode>): Likewise.
12034
+ (p8_mtvsrd_1): Likewise.
12035
+ (p8_mtvsrd_2): Likewise.
12036
+ (p8_xxpermdi_<mode>): Likewise.
12037
+ (reload_vsx_from_gpr<mode>): Likewise.
12038
+ (reload_vsx_from_gprsf): Likewise.
12039
+ (p8_mfvsrd_3_<mode>): LIkewise.
12040
+ (reload_gpr_from_vsx<mode>): Likewise.
12041
+ (reload_gpr_from_vsxsf): Likewise.
12042
+ (p8_mfvsrd_4_disf): Likewise.
12043
+ (multi-word GPR splits): Do not split direct moves or quad memory
12046
+2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
12048
+ Backport from the trunk
12050
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
12051
+ Pat Haugen <pthaugen@us.ibm.com>
12052
+ Peter Bergner <bergner@vnet.ibm.com>
12054
+ * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
12055
+ Document new power8 builtins.
12057
+ * config/rs6000/vector.md (and<mode>3): Add a clobber/scratch of a
12058
+ condition code register, to allow 128-bit logical operations to be
12059
+ done in the VSX or GPR registers.
12060
+ (nor<mode>3): Use the canonical form for nor.
12061
+ (eqv<mode>3): Add expanders for power8 xxleqv, xxlnand, xxlorc,
12062
+ vclz*, and vpopcnt* vector instructions.
12063
+ (nand<mode>3): Likewise.
12064
+ (orc<mode>3): Likewise.
12065
+ (clz<mode>2): LIkewise.
12066
+ (popcount<mode>2): Likewise.
12068
+ * config/rs6000/predicates.md (int_reg_operand): Rework tests so
12069
+ that only the GPRs are recognized.
12071
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
12072
+ support for new power8 builtins.
12074
+ * config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8
12075
+ builtin functions.
12076
+ (xscvdpspn): Likewise.
12077
+ (vclz): Likewise.
12078
+ (vclzb): Likewise.
12079
+ (vclzh): Likewise.
12080
+ (vclzw): Likewise.
12081
+ (vclzd): Likewise.
12082
+ (vpopcnt): Likewise.
12083
+ (vpopcntb): Likewise.
12084
+ (vpopcnth): Likewise.
12085
+ (vpopcntw): Likewise.
12086
+ (vpopcntd): Likewise.
12087
+ (vgbbd): Likewise.
12088
+ (vmrgew): Likewise.
12089
+ (vmrgow): Likewise.
12091
+ (eqv_v16qi3): Likewise.
12092
+ (eqv_v8hi3): Likewise.
12093
+ (eqv_v4si3): Likewise.
12094
+ (eqv_v2di3): Likewise.
12095
+ (eqv_v4sf3): Likewise.
12096
+ (eqv_v2df3): Likewise.
12097
+ (nand): Likewise.
12098
+ (nand_v16qi3): Likewise.
12099
+ (nand_v8hi3): Likewise.
12100
+ (nand_v4si3): Likewise.
12101
+ (nand_v2di3): Likewise.
12102
+ (nand_v4sf3): Likewise.
12103
+ (nand_v2df3): Likewise.
12105
+ (orc_v16qi3): Likewise.
12106
+ (orc_v8hi3): Likewise.
12107
+ (orc_v4si3): Likewise.
12108
+ (orc_v2di3): Likewise.
12109
+ (orc_v4sf3): Likewise.
12110
+ (orc_v2df3): Likewise.
12112
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Only
12113
+ allow power8 quad mode in 64-bit.
12114
+ (rs6000_builtin_vectorized_function): Add support to vectorize
12115
+ ISA 2.07 count leading zeros, population count builtins.
12116
+ (rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form
12117
+ V4SF vectors instead of xscvdpsp to avoid IEEE related traps.
12118
+ (builtin_function_type): Add vgbbd builtin function which takes an
12119
+ unsigned argument.
12120
+ (altivec_expand_vec_perm_const): Add support for new power8 merge
12123
+ * config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types,
12124
+ that does not include TImdoe for use with 32-bit.
12125
+ (UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn
12127
+ (UNSPEC_VSX_CVDPSPN): Likewise.
12128
+ (vsx_xscvdpspn): Likewise.
12129
+ (vsx_xscvspdpn): Likewise.
12130
+ (vsx_xscvdpspn_scalar): Likewise.
12131
+ (vsx_xscvspdpn_directmove): Likewise.
12132
+ (vsx_and<mode>3): Split logical operations into 32-bit and
12133
+ 64-bit. Add support to do logical operations on TImode as well as
12134
+ VSX vector types. Allow logical operations to be done in either
12135
+ VSX registers or in general purpose registers in 64-bit mode. Add
12136
+ splitters if GPRs were used. For AND, add clobber of CCmode to
12137
+ allow use of ANDI on GPRs. Rewrite nor to use the canonical RTL
12139
+ (vsx_and<mode>3_32bit): Likewise.
12140
+ (vsx_and<mode>3_64bit): Likewise.
12141
+ (vsx_ior<mode>3): Likewise.
12142
+ (vsx_ior<mode>3_32bit): Likewise.
12143
+ (vsx_ior<mode>3_64bit): Likewise.
12144
+ (vsx_xor<mode>3): Likewise.
12145
+ (vsx_xor<mode>3_32bit): Likewise.
12146
+ (vsx_xor<mode>3_64bit): Likewise.
12147
+ (vsx_one_cmpl<mode>2): Likewise.
12148
+ (vsx_one_cmpl<mode>2_32bit): Likewise.
12149
+ (vsx_one_cmpl<mode>2_64bit): Likewise.
12150
+ (vsx_nor<mode>3): Likewise.
12151
+ (vsx_nor<mode>3_32bit): Likewise.
12152
+ (vsx_nor<mode>3_64bit): Likewise.
12153
+ (vsx_andc<mode>3): Likewise.
12154
+ (vsx_andc<mode>3_32bit): Likewise.
12155
+ (vsx_andc<mode>3_64bit): Likewise.
12156
+ (vsx_eqv<mode>3_32bit): Add support for power8 xxleqv, xxlnand,
12157
+ and xxlorc instructions.
12158
+ (vsx_eqv<mode>3_64bit): Likewise.
12159
+ (vsx_nand<mode>3_32bit): Likewise.
12160
+ (vsx_nand<mode>3_64bit): Likewise.
12161
+ (vsx_orc<mode>3_32bit): Likewise.
12162
+ (vsx_orc<mode>3_64bit): Likewise.
12164
+ * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment.
12166
+ * config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd
12168
+ (p8_vmrgew): Add power8 vmrgew and vmrgow instructions.
12169
+ (p8_vmrgow): Likewise.
12170
+ (altivec_and<mode>3): Add clobber of CCmode to allow AND using
12171
+ GPRs to be split under VSX.
12172
+ (p8v_clz<mode>2): Add power8 count leading zero support.
12173
+ (p8v_popcount<mode>2): Add power8 population count support.
12174
+ (p8v_vgbbd): Add power8 gather bits by bytes by doubleword
12177
+ * config/rs6000/rs6000.md (eqv<mode>3): Add support for powerp eqv
12180
+ * config/rs6000/altivec.h (vec_eqv): Add defines to export power8
12181
+ builtin functions.
12182
+ (vec_nand): Likewise.
12183
+ (vec_vclz): Likewise.
12184
+ (vec_vclzb): Likewise.
12185
+ (vec_vclzd): Likewise.
12186
+ (vec_vclzh): Likewise.
12187
+ (vec_vclzw): Likewise.
12188
+ (vec_vgbbd): Likewise.
12189
+ (vec_vmrgew): Likewise.
12190
+ (vec_vmrgow): Likewise.
12191
+ (vec_vpopcnt): Likewise.
12192
+ (vec_vpopcntb): Likewise.
12193
+ (vec_vpopcntd): Likewise.
12194
+ (vec_vpopcnth): Likewise.
12195
+ (vec_vpopcntw): Likewise.
12197
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
12199
+ Merge up to 199753.
12200
+ * REVISION: Update subversion id.
12202
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
12204
+ Backport from trunk
12206
+ 2013-05-29 Michael Meissner <meissner@linux.vnet.ibm.com>
12207
+ Pat Haugen <pthaugen@us.ibm.com>
12208
+ Peter Bergner <bergner@vnet.ibm.com>
12210
+ * config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI
12212
+ (VEC_A): Likewise.
12213
+ (VEC_C): Likewise.
12214
+ (vrotl<mode>3): Likewise.
12215
+ (vashl<mode>3): Likewise.
12216
+ (vlshr<mode>3): Likewise.
12217
+ (vashr<mode>3): Likewise.
12219
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
12220
+ support for power8 V2DI builtins.
12222
+ * config/rs6000/rs6000-builtin.def (abs_v2di): Add support for
12223
+ power8 V2DI builtins.
12224
+ (vupkhsw): Likewise.
12225
+ (vupklsw): Likewise.
12226
+ (vaddudm): Likewise.
12227
+ (vminsd): Likewise.
12228
+ (vmaxsd): Likewise.
12229
+ (vminud): Likewise.
12230
+ (vmaxud): Likewise.
12231
+ (vpkudum): Likewise.
12232
+ (vpksdss): Likewise.
12233
+ (vpkudus): Likewise.
12234
+ (vpksdus): Likewise.
12235
+ (vrld): Likewise.
12236
+ (vsld): Likewise.
12237
+ (vsrd): Likewise.
12238
+ (vsrad): Likewise.
12239
+ (vsubudm): Likewise.
12240
+ (vcmpequd): Likewise.
12241
+ (vcmpgtsd): Likewise.
12242
+ (vcmpgtud): Likewise.
12243
+ (vcmpequd_p): Likewise.
12244
+ (vcmpgtsd_p): Likewise.
12245
+ (vcmpgtud_p): Likewise.
12246
+ (vupkhsw): Likewise.
12247
+ (vupklsw): Likewise.
12248
+ (vaddudm): Likewise.
12249
+ (vmaxsd): Likewise.
12250
+ (vmaxud): Likewise.
12251
+ (vminsd): Likewise.
12252
+ (vminud): Likewise.
12253
+ (vpksdss): Likewise.
12254
+ (vpksdus): Likewise.
12255
+ (vpkudum): Likewise.
12256
+ (vpkudus): Likewise.
12257
+ (vrld): Likewise.
12258
+ (vsld): Likewise.
12259
+ (vsrad): Likewise.
12260
+ (vsrd): Likewise.
12261
+ (vsubudm): Likewise.
12263
+ * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add
12264
+ support for power8 V2DI instructions.
12266
+ * config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for
12267
+ power8 V2DI instructions. Combine pack and unpack insns to use an
12268
+ iterator for each mode. Check whether a particular mode supports
12269
+ Altivec instructions instead of just checking TARGET_ALTIVEC.
12270
+ (UNSPEC_VPKUWUM): Likewise.
12271
+ (UNSPEC_VPKSHSS): Likewise.
12272
+ (UNSPEC_VPKSWSS): Likewise.
12273
+ (UNSPEC_VPKUHUS): Likewise.
12274
+ (UNSPEC_VPKSHUS): Likewise.
12275
+ (UNSPEC_VPKUWUS): Likewise.
12276
+ (UNSPEC_VPKSWUS): Likewise.
12277
+ (UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise.
12278
+ (UNSPEC_VPACK_SIGN_UNS_SAT): Likewise.
12279
+ (UNSPEC_VPACK_UNS_UNS_SAT): Likewise.
12280
+ (UNSPEC_VPACK_UNS_UNS_MOD): Likewise.
12281
+ (UNSPEC_VUPKHSB): Likewise.
12282
+ (UNSPEC_VUNPACK_HI_SIGN): Likewise.
12283
+ (UNSPEC_VUNPACK_LO_SIGN): Likewise.
12284
+ (UNSPEC_VUPKHSH): Likewise.
12285
+ (UNSPEC_VUPKLSB): Likewise.
12286
+ (UNSPEC_VUPKLSH): Likewise.
12288
+ (VI_char): Likewise.
12289
+ (VI_scalar): Likewise.
12290
+ (VI_unit): Likewise.
12292
+ (VP_small): Likewise.
12293
+ (VP_small_lc): Likewise.
12294
+ (VU_char): Likewise.
12295
+ (add<mode>3): Likewise.
12296
+ (altivec_vaddcuw): Likewise.
12297
+ (altivec_vaddu<VI_char>s): Likewise.
12298
+ (altivec_vadds<VI_char>s): Likewise.
12299
+ (sub<mode>3): Likewise.
12300
+ (altivec_vsubcuw): Likewise.
12301
+ (altivec_vsubu<VI_char>s): Likewise.
12302
+ (altivec_vsubs<VI_char>s): Likewise.
12303
+ (altivec_vavgs<VI_char>): Likewise.
12304
+ (altivec_vcmpbfp): Likewise.
12305
+ (altivec_eq<mode>): Likewise.
12306
+ (altivec_gt<mode>): Likewise.
12307
+ (altivec_gtu<mode>): Likewise.
12308
+ (umax<mode>3): Likewise.
12309
+ (smax<mode>3): Likewise.
12310
+ (umin<mode>3): Likewise.
12311
+ (smin<mode>3): Likewise.
12312
+ (altivec_vpkuhum): Likewise.
12313
+ (altivec_vpkuwum): Likewise.
12314
+ (altivec_vpkshss): Likewise.
12315
+ (altivec_vpkswss): Likewise.
12316
+ (altivec_vpkuhus): Likewise.
12317
+ (altivec_vpkshus): Likewise.
12318
+ (altivec_vpkuwus): Likewise.
12319
+ (altivec_vpkswus): Likewise.
12320
+ (altivec_vpks<VI_char>ss): Likewise.
12321
+ (altivec_vpks<VI_char>us): Likewise.
12322
+ (altivec_vpku<VI_char>us): Likewise.
12323
+ (altivec_vpku<VI_char>um): Likewise.
12324
+ (altivec_vrl<VI_char>): Likewise.
12325
+ (altivec_vsl<VI_char>): Likewise.
12326
+ (altivec_vsr<VI_char>): Likewise.
12327
+ (altivec_vsra<VI_char>): Likewise.
12328
+ (altivec_vsldoi_<mode>): Likewise.
12329
+ (altivec_vupkhsb): Likewise.
12330
+ (altivec_vupkhs<VU_char>): Likewise.
12331
+ (altivec_vupkls<VU_char>): Likewise.
12332
+ (altivec_vupkhsh): Likewise.
12333
+ (altivec_vupklsb): Likewise.
12334
+ (altivec_vupklsh): Likewise.
12335
+ (altivec_vcmpequ<VI_char>_p): Likewise.
12336
+ (altivec_vcmpgts<VI_char>_p): Likewise.
12337
+ (altivec_vcmpgtu<VI_char>_p): Likewise.
12338
+ (abs<mode>2): Likewise.
12339
+ (vec_unpacks_hi_v16qi): Likewise.
12340
+ (vec_unpacks_hi_v8hi): Likewise.
12341
+ (vec_unpacks_lo_v16qi): Likewise.
12342
+ (vec_unpacks_hi_<VP_small_lc>): Likewise.
12343
+ (vec_unpacks_lo_v8hi): Likewise.
12344
+ (vec_unpacks_lo_<VP_small_lc>): Likewise.
12345
+ (vec_pack_trunc_v8h): Likewise.
12346
+ (vec_pack_trunc_v4si): Likewise.
12347
+ (vec_pack_trunc_<mode>): Likewise.
12349
+ * config/rs6000/altivec.h (vec_vaddudm): Add defines for power8
12351
+ (vec_vmaxsd): Likewise.
12352
+ (vec_vmaxud): Likewise.
12353
+ (vec_vminsd): Likewise.
12354
+ (vec_vminud): Likewise.
12355
+ (vec_vpksdss): Likewise.
12356
+ (vec_vpksdus): Likewise.
12357
+ (vec_vpkudum): Likewise.
12358
+ (vec_vpkudus): Likewise.
12359
+ (vec_vrld): Likewise.
12360
+ (vec_vsld): Likewise.
12361
+ (vec_vsrad): Likewise.
12362
+ (vec_vsrd): Likewise.
12363
+ (vec_vsubudm): Likewise.
12364
+ (vec_vupkhsw): Likewise.
12365
+ (vec_vupklsw): Likewise.
12367
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
12368
+ Pat Haugen <pthaugen@us.ibm.com>
12369
+ Peter Bergner <bergner@vnet.ibm.com>
12371
+ * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add
12372
+ documentation for the power8 crypto builtins.
12374
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md.
12376
+ * config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support
12377
+ macros for defining power8 builtin functions.
12378
+ (BU_P8V_AV_2): Likewise.
12379
+ (BU_P8V_AV_P): Likewise.
12380
+ (BU_P8V_VSX_1): Likewise.
12381
+ (BU_P8V_OVERLOAD_1): Likewise.
12382
+ (BU_P8V_OVERLOAD_2): Likewise.
12383
+ (BU_CRYPTO_1): Likewise.
12384
+ (BU_CRYPTO_2): Likewise.
12385
+ (BU_CRYPTO_3): Likewise.
12386
+ (BU_CRYPTO_OVERLOAD_1): Likewise.
12387
+ (BU_CRYPTO_OVERLOAD_2): Likewise.
12388
+ (XSCVSPDP): Fix typo, point to the correct instruction.
12389
+ (VCIPHER): Add power8 crypto builtins.
12390
+ (VCIPHERLAST): Likewise.
12391
+ (VNCIPHER): Likewise.
12392
+ (VNCIPHERLAST): Likewise.
12393
+ (VPMSUMB): Likewise.
12394
+ (VPMSUMH): Likewise.
12395
+ (VPMSUMW): Likewise.
12396
+ (VPERMXOR_V2DI): Likewise.
12397
+ (VPERMXOR_V4SI: Likewise.
12398
+ (VPERMXOR_V8HI: Likewise.
12399
+ (VPERMXOR_V16QI: Likewise.
12400
+ (VSHASIGMAW): Likewise.
12401
+ (VSHASIGMAD): Likewise.
12402
+ (VPMSUM): Likewise.
12403
+ (VPERMXOR): Likewise.
12404
+ (VSHASIGMA): Likewise.
12406
+ * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
12407
+ __CRYPTO__ if the crypto instructions are available.
12408
+ (altivec_overloaded_builtins): Add support for overloaded power8
12411
+ * config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add
12412
+ support for power8 crypto builtins.
12413
+ (builtin_function_type): Likewise.
12414
+ (altivec_init_builtins): Add support for builtins that take vector
12415
+ long long (V2DI) arguments.
12417
+ * config/rs6000/crypto.md: New file, define power8 crypto
12420
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
12421
+ Pat Haugen <pthaugen@us.ibm.com>
12422
+ Peter Bergner <bergner@vnet.ibm.com>
12424
+ * doc/invoke.texi (Option Summary): Add power8 options.
12425
+ (RS/6000 and PowerPC Options): Likewise.
12427
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use
12428
+ constraints.md instead of rs6000.h. Reorder w* constraints. Add
12429
+ wm, wn, wr documentation.
12431
+ * gcc/config/rs6000/constraints.md (wm): New constraint for VSX
12432
+ registers if direct move instructions are enabled.
12433
+ (wn): New constraint for no registers.
12434
+ (wq): New constraint for quad word even GPR registers.
12435
+ (wr): New constraint if 64-bit instructions are enabled.
12436
+ (wv): New constraint if power8 vector instructions are enabled.
12437
+ (wQ): New constraint for quad word memory locations.
12439
+ * gcc/config/rs6000/predicates.md (const_0_to_15_operand): New
12440
+ constraint for 0..15 for crypto instructions.
12441
+ (gpc_reg_operand): If VSX allow registers in VSX registers as well
12442
+ as GPR and floating point registers.
12443
+ (int_reg_operand): New predicate to match only GPR registers.
12444
+ (base_reg_operand): New predicate to match base registers.
12445
+ (quad_int_reg_operand): New predicate to match even GPR registers
12446
+ for quad memory operations.
12447
+ (vsx_reg_or_cint_operand): New predicate to allow vector logical
12448
+ operations in both GPR and VSX registers.
12449
+ (quad_memory_operand): New predicate for quad memory operations.
12450
+ (reg_or_indexed_operand): New predicate for direct move support.
12452
+ * gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED):
12453
+ Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS.
12454
+ (ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8).
12455
+ (POWERPC_MASKS): Add power8 options.
12456
+ (power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the
12459
+ * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
12460
+ Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8.
12462
+ * gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation.
12463
+ (-mpower8-fusion): New power8 options.
12464
+ (-mpower8-fusion-sign): Likewise.
12465
+ (-mpower8-vector): Likewise.
12466
+ (-mcrypto): Likewise.
12467
+ (-mdirect-move): Likewise.
12468
+ (-mquad-memory): Likewise.
12470
+ * gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for
12472
+ (rs6000_hard_regno_mode_ok): Make PTImode only match even GPR
12474
+ (rs6000_debug_reg_print): Print the base register class if
12476
+ (rs6000_debug_vector_unit): Add p8_vector.
12477
+ (rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint
12478
+ definitions. Also print fusion state.
12479
+ (rs6000_init_hard_regno_mode_ok): Set up power8 constraints.
12480
+ (rs6000_builtin_mask_calculate): Add power8 builtin support.
12481
+ (rs6000_option_override_internal): Add support for power8.
12482
+ (rs6000_common_init_builtins): Add debugging for skipped builtins
12483
+ if -mdebug=builtin.
12484
+ (rs6000_adjust_cost): Add power8 support.
12485
+ (rs6000_issue_rate): Likewise.
12486
+ (insn_must_be_first_in_group): Likewise.
12487
+ (insn_must_be_last_in_group): Likewise.
12488
+ (force_new_group): Likewise.
12489
+ (rs6000_register_move_cost): Likewise.
12490
+ (rs6000_opt_masks): Likewise.
12492
+ * config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a
12493
+ power8 capable assembler, default to power7 options.
12494
+ (TARGET_DIRECT_MOVE): Likewise.
12495
+ (TARGET_CRYPTO): Likewise.
12496
+ (TARGET_P8_VECTOR): Likewise.
12497
+ (VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support.
12498
+ (VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise.
12499
+ (VECTOR_MEM_P8_VECTOR_P): Likewise.
12500
+ (VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise.
12501
+ (VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise.
12502
+ (TARGET_XSCVDPSPN): Likewise.
12503
+ (TARGET_XSCVSPDPN): Likewsie.
12504
+ (TARGET_SYNC_HI_QI): Likewise.
12505
+ (TARGET_SYNC_TI): Likewise.
12506
+ (MASK_CRYPTO): Likewise.
12507
+ (MASK_DIRECT_MOVE): Likewise.
12508
+ (MASK_P8_FUSION): Likewise.
12509
+ (MASK_P8_VECTOR): Likewise.
12510
+ (REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the
12511
+ TFmode temporary used by some of the direct move instructions to
12512
+ get two FP temporary registers does not force creation of a stack
12514
+ (VLOGICAL_REGNO_P): Allow vector logical operations in GPRs.
12515
+ (MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so
12516
+ that any VSX registers are tieable, even if they are also an
12517
+ Altivec vector mode.
12518
+ (r6000_reg_class_enum): Add wm, wr, wv constraints.
12519
+ (RS6000_BTM_P8_VECTOR): Power8 builtin support.
12520
+ (RS6000_BTM_CRYPTO): Likewise.
12521
+ (RS6000_BTM_COMMON): Likewise.
12523
+ * config/rs6000/rs6000.md (cpu attribute): Add power8.
12524
+ * config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise.
12525
+ (enum rs6000_vector): Add power8 vector support.
12527
+2013-05-06 Michael Meissner <meissner@linux.vnet.ibm.com>
12529
+ Merge up to 198656.
12530
+ * REVISION: Update subversion id.
12532
+ Backport from trunk
12533
+ 2013-05-03 Michael Meissner <meissner@linux.vnet.ibm.com>
12536
+ * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode
12537
+ to save TFmode registers and DImode to save TImode registers for
12538
+ caller save operations.
12539
+ (HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to
12540
+ mark being partially clobbered since they only use the first
12543
+ * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode
12544
+ and TDmode only use the upper 64-bits of each VSX register.
12546
+2013-04-09 Michael Meissner <meissner@linux.vnet.ibm.com>
12548
+ Merge up to 197642.
12549
+ * REVISION: Update subversion id.
12551
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
12553
+ Backport from mainline
12554
+ 2013-03-20 Pat Haugen <pthaugen@us.ibm.com>
12556
+ * config/rs6000/predicates.md (indexed_address, update_address_mem
12557
+ update_indexed_address_mem): New predicates.
12558
+ * config/rs6000/vsx.md (vsx_extract_<mode>_zero): Set correct "type"
12559
+ attribute for load/store instructions.
12560
+ * config/rs6000/dfp.md (movsd_store): Likewise.
12561
+ (movsd_load): Likewise.
12562
+ * config/rs6000/rs6000.md (zero_extend<mode>di2_internal1): Likewise.
12563
+ (unnamed HI->DI extend define_insn): Likewise.
12564
+ (unnamed SI->DI extend define_insn): Likewise.
12565
+ (unnamed QI->SI extend define_insn): Likewise.
12566
+ (unnamed QI->HI extend define_insn): Likewise.
12567
+ (unnamed HI->SI extend define_insn): Likewise.
12568
+ (unnamed HI->SI extend define_insn): Likewise.
12569
+ (extendsfdf2_fpr): Likewise.
12570
+ (movsi_internal1): Likewise.
12571
+ (movsi_internal1_single): Likewise.
12572
+ (movhi_internal): Likewise.
12573
+ (movqi_internal): Likewise.
12574
+ (movcc_internal1): Correct mnemonic for stw insn. Set correct "type"
12575
+ attribute for load/store instructions.
12576
+ (mov<mode>_hardfloat): Set correct "type" attribute for load/store
12578
+ (mov<mode>_softfloat): Likewise.
12579
+ (mov<mode>_hardfloat32): Likewise.
12580
+ (mov<mode>_hardfloat64): Likewise.
12581
+ (mov<mode>_softfloat64): Likewise.
12582
+ (movdi_internal32): Likewise.
12583
+ (movdi_internal64): Likewise.
12584
+ (probe_stack_<mode>): Likewise.
12586
+ Backport from mainline
12587
+ 2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
12589
+ * config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary
12590
+ floating point, and decimal floating point to reload iterator.
12592
+ * config/rs6000/constraints.md (wl constraint): New constraints to
12593
+ return FLOAT_REGS if certain options are used to reduce the number
12594
+ of separate patterns that exist in the file.
12595
+ (wx constraint): Likewise.
12596
+ (wz constraint): Likewise.
12598
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): If
12599
+ -mdebug=reg, print wg, wl, wx, and wz constraints.
12600
+ (rs6000_init_hard_regno_mode_ok): Initialize new constraints.
12601
+ Initialize the reload functions for 64-bit binary/decimal floating
12603
+ (reg_offset_addressing_ok_p): If we are on a power7 or later, use
12604
+ LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
12605
+ create the buffer on the stack to overcome not having a 32-bit
12607
+ (rs6000_emit_move): Likewise.
12608
+ (rs6000_secondary_memory_needed_rtx): Likewise.
12609
+ (rs6000_alloc_sdmode_stack_slot): Likewise.
12610
+ (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
12611
+ via xxlxor, just like DFmode 0.0.
12613
+ * config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro,
12614
+ define as 1 if we are running on a power7 or newer.
12615
+ (enum r6000_reg_class_enum): Add new constraints.
12617
+ * config/rs6000/dfp.md (movsd): Delete, combine with binary
12618
+ floating point moves in rs6000.md. Combine power6x (mfpgpr) moves
12619
+ with other moves by using conditional constraits (wg). Use LFIWZX
12620
+ and STFIWX for loading SDmode on power7. Use xxlxor to create
12622
+ (movsd splitter): Likewise.
12623
+ (movsd_hardfloat): Likewise.
12624
+ (movsd_softfloat): Likewise.
12626
+ * config/rs6000/rs6000.md (FMOVE32): New iterators to combine
12627
+ binary and decimal floating point moves.
12628
+ (fmove_ok): New attributes to combine binary and decimal floating
12629
+ point moves, and to combine power6x (mfpgpr) moves along normal
12631
+ (real_value_to_target): Likewise.
12632
+ (f32_lr): Likewise.
12633
+ (f32_lm): Likewise.
12634
+ (f32_li): Likewise.
12635
+ (f32_sr): Likewise.
12636
+ (f32_sm): Likewise.
12637
+ (f32_si): Likewise.
12638
+ (movsf): Combine binary and decimal floating point moves. Combine
12639
+ power6x (mfpgpr) moves with other moves by using conditional
12640
+ constraits (wg). Use LFIWZX and STFIWX for loading SDmode on
12642
+ (mov<mode> for SFmode/SDmode); Likewise.
12643
+ (SFmode/SDmode splitters): Likewise.
12644
+ (movsf_hardfloat): Likewise.
12645
+ (mov<mode>_hardfloat for SFmode/SDmode): Likewise.
12646
+ (movsf_softfloat): Likewise.
12647
+ (mov<mode>_softfloat for SFmode/SDmode): Likewise.
12649
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl,
12650
+ wx and wz constraints.
12652
+ * config/rs6000/constraints.md (wg constraint): New constraint to
12653
+ return FLOAT_REGS if -mmfpgpr (power6x) was used.
12655
+ * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg
12658
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): If
12659
+ -mdebug=reg, print wg, wl, wx, and wz constraints.
12660
+ (rs6000_init_hard_regno_mode_ok): Initialize new constraints.
12661
+ Initialize the reload functions for 64-bit binary/decimal floating
12663
+ (reg_offset_addressing_ok_p): If we are on a power7 or later, use
12664
+ LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
12665
+ create the buffer on the stack to overcome not having a 32-bit
12667
+ (rs6000_emit_move): Likewise.
12668
+ (rs6000_secondary_memory_needed_rtx): Likewise.
12669
+ (rs6000_alloc_sdmode_stack_slot): Likewise.
12670
+ (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
12671
+ via xxlxor, just like DFmode 0.0.
12674
+ * config/rs6000/dfp.md (movdd): Delete, combine with binary
12675
+ floating point moves in rs6000.md. Combine power6x (mfpgpr) moves
12676
+ with other moves by using conditional constraits (wg). Use LFIWZX
12677
+ and STFIWX for loading SDmode on power7.
12678
+ (movdd splitters): Likewise.
12679
+ (movdd_hardfloat32): Likewise.
12680
+ (movdd_softfloat32): Likewise.
12681
+ (movdd_hardfloat64_mfpgpr): Likewise.
12682
+ (movdd_hardfloat64): Likewise.
12683
+ (movdd_softfloat64): Likewise.
12685
+ * config/rs6000/rs6000.md (FMOVE64): New iterators to combine
12686
+ 64-bit binary and decimal floating point moves.
12687
+ (FMOVE64X): Likewise.
12688
+ (movdf): Combine 64-bit binary and decimal floating point moves.
12689
+ Combine power6x (mfpgpr) moves with other moves by using
12690
+ conditional constraits (wg).
12691
+ (mov<mode> for DFmode/DDmode): Likewise.
12692
+ (DFmode/DDmode splitters): Likewise.
12693
+ (movdf_hardfloat32): Likewise.
12694
+ (mov<mode>_hardfloat32 for DFmode/DDmode): Likewise.
12695
+ (movdf_softfloat32): Likewise.
12696
+ (movdf_hardfloat64_mfpgpr): Likewise.
12697
+ (movdf_hardfloat64): Likewise.
12698
+ (mov<mode>_hardfloat64 for DFmode/DDmode): Likewise.
12699
+ (movdf_softfloat64): Likewise.
12700
+ (mov<mode>_softfloat64 for DFmode/DDmode): Likewise.
12701
+ (reload_<mode>_load): Move to later in the file so they aren't in
12702
+ the middle of the floating point move insns.
12703
+ (reload_<mode>_store): Likewise.
12705
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg
12708
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg
12709
+ constraint if -mdebug=reg.
12710
+ (rs6000_initi_hard_regno_mode_ok): Enable wg constraint if
12711
+ -mfpgpr. Enable using dd reload support if needed.
12713
+ * config/rs6000/dfp.md (movtd): Delete, combine with 128-bit
12714
+ binary and decimal floating point moves in rs6000.md.
12715
+ (movtd_internal): Likewise.
12717
+ * config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and
12718
+ decimal floating point moves.
12719
+ (movtf): Likewise.
12720
+ (movtf_internal): Likewise.
12721
+ (mov<mode>_internal, TDmode/TFmode): Likewise.
12722
+ (movtf_softfloat): Likewise.
12723
+ (mov<mode>_softfloat, TDmode/TFmode): Likewise.
12725
+ * config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with
12726
+ movdi_internal64, using wg constraint for move direct operations.
12727
+ (movdi_internal64): Likewise.
12729
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print
12730
+ MODES_TIEABLE_P for selected modes. Print the numerical value of
12731
+ the various virtual registers. Use GPR/FPR first/last values,
12732
+ instead of hard coding the register numbers. Print which modes
12733
+ have reload functions registered.
12734
+ (rs6000_option_override_internal): If -mdebug=reg, trace the
12735
+ options settings before/after setting cpu, target and subtarget
12737
+ (rs6000_secondary_reload_trace): Improve the RTL dump for
12738
+ -mdebug=addr and for secondary reload failures in
12739
+ rs6000_secondary_reload_inner.
12740
+ (rs6000_secondary_reload_fail): Likewise.
12741
+ (rs6000_secondary_reload_inner): Likewise.
12743
+ * config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience
12744
+ macros for first/last GPR and FPR registers.
12745
+ (LAST_GPR_REGNO): Likewise.
12746
+ (FIRST_FPR_REGNO): Likewise.
12747
+ (LAST_FPR_REGNO): Likewise.
12749
+ * config/rs6000/vector.md (mul<mode>3): Use the combined macro
12750
+ VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to
12751
+ VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P.
12752
+ (vcond<mode><mode>): Likewise.
12753
+ (vcondu<mode><mode>): Likewise.
12754
+ (vector_gtu<mode>): Likewise.
12755
+ (vector_gte<mode>): Likewise.
12756
+ (xor<mode>3): Don't allow logical operations on TImode in 32-bit
12757
+ to prevent the compiler from converting DImode operations to
12759
+ (ior<mode>3): Likewise.
12760
+ (and<mode>3): Likewise.
12761
+ (one_cmpl<mode>2): Likewise.
12762
+ (nor<mode>3): Likewise.
12763
+ (andc<mode>3): Likewise.
12765
+ * config/rs6000/constraints.md (wt constraint): New constraint
12766
+ that returns VSX_REGS if TImode is allowed in VSX registers.
12768
+ * config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy
12769
+ constant under VSX.
12771
+ * config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is
12772
+ similar to TImode, but it is restricted to being in the GPRs.
12774
+ * config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow
12775
+ TImode to occupy a single VSX register.
12777
+ * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to
12778
+ -mvsx-timode for power7/power8.
12779
+ (power7 cpu): Likewise.
12780
+ (power8 cpu): Likewise.
12782
+ * config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make
12783
+ sure that TFmode/TDmode take up two registers if they are ever
12784
+ allowed in the upper VSX registers.
12785
+ (rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX
12787
+ (rs6000_init_hard_regno_mode_ok): Likewise.
12788
+ (rs6000_debug_reg_global): Add debugging for PTImode and wt
12789
+ constraint. Print if LRA is turned on.
12790
+ (rs6000_option_override_internal): Give an error if -mvsx-timode
12791
+ and VSX is not enabled.
12792
+ (invalid_e500_subreg): Handle PTImode, restricting it to GPRs. If
12793
+ -mvsx-timode, restrict TImode to reg+reg addressing, and PTImode
12794
+ to reg+offset addressing. Use PTImode when checking offset
12795
+ addresses for validity.
12796
+ (reg_offset_addressing_ok_p): Likewise.
12797
+ (rs6000_legitimate_offset_address_p): Likewise.
12798
+ (rs6000_legitimize_address): Likewise.
12799
+ (rs6000_legitimize_reload_address): Likewise.
12800
+ (rs6000_legitimate_address_p): Likewise.
12801
+ (rs6000_eliminate_indexed_memrefs): Likewise.
12802
+ (rs6000_emit_move): Likewise.
12803
+ (rs6000_secondary_reload): Likewise.
12804
+ (rs6000_secondary_reload_inner): Handle PTImode. Allow 64-bit
12805
+ reloads to fpr registers to continue to use reg+offset addressing,
12806
+ but 64-bit reloads to altivec registers need reg+reg addressing.
12807
+ Drop test for PRE_MODIFY, since VSX loads/stores no longer support
12808
+ it. Treat LO_SUM like a PLUS operation.
12809
+ (rs6000_secondary_reload_class): If type is 64-bit, prefer to use
12810
+ FLOAT_REGS instead of VSX_RGS to allow use of reg+offset
12812
+ (rs6000_cannot_change_mode_class): Do not allow TImode in VSX
12813
+ registers to share a register with a smaller sized type, since VSX
12814
+ puts scalars in the upper 64-bits.
12815
+ (print_operand): Add support for PTImode.
12816
+ (rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of
12817
+ VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX
12818
+ registers, but don't have arithmetic support.
12819
+ (rs6000_memory_move_cost): Add test for VSX.
12820
+ (rs6000_opt_masks): Add -mvsx-timode.
12822
+ * config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves
12825
+ (VSr): Use wt constraint for TImode.
12826
+ (VSv): Drop TImode support.
12827
+ (vsx_movti): Delete, replace with versions for 32-bit and 64-bit.
12828
+ (vsx_movti_64bit): Likewise.
12829
+ (vsx_movti_32bit): Likewise.
12830
+ (vec_store_<mode>): Use VSX iterator instead of vector iterator.
12831
+ (vsx_and<mode>3): Delete use of '?' constraint on inputs, just put
12832
+ one '?' on the appropriate output constraint. Do not allow TImode
12833
+ logical operations on 32-bit systems.
12834
+ (vsx_ior<mode>3): Likewise.
12835
+ (vsx_xor<mode>3): Likewise.
12836
+ (vsx_one_cmpl<mode>2): Likewise.
12837
+ (vsx_nor<mode>3): Likewise.
12838
+ (vsx_andc<mode>3): Likewise.
12839
+ (vsx_concat_<mode>): Likewise.
12840
+ (vsx_xxpermdi_<mode>): Fix thinko for non V2DF/V2DI modes.
12842
+ * config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from
12843
+ OPTION_MASK_VSX_TIMODE.
12844
+ (enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt.
12845
+ (STACK_SAVEAREA_MODE): Use PTImode instead of TImode.
12847
+ * config/rs6000/rs6000.md (INT mode attribute): Add PTImode.
12848
+ (TI2 iterator): New iterator for TImode, PTImode.
12849
+ (wd mode attribute): Add values for vector types.
12850
+ (movti_string): Replace TI move operations with operations for
12851
+ TImode and PTImode. Add support for TImode being allowed in VSX
12853
+ (mov<mode>_string, TImode/PTImode): Likewise.
12854
+ (movti_ppc64): Likewise.
12855
+ (mov<mode>_ppc64, TImode/PTImode): Likewise.
12856
+ (TI mode splitters): Likewise.
12858
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt
12861
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
12863
+ Clone branch from gcc-4_8-branch, subversion id 196835.
12864
+ * REVISION: New file, track subversion id.
12866
--- a/src/gcc/calls.c
12867
+++ b/src/gcc/calls.c
12868
@@ -983,6 +983,7 @@
12870
for (i = 0; i < num_actuals; i++)
12871
if (args[i].reg != 0 && ! args[i].pass_on_stack
12872
+ && GET_CODE (args[i].reg) != PARALLEL
12873
&& args[i].mode == BLKmode
12874
&& MEM_P (args[i].value)
12875
&& (MEM_ALIGN (args[i].value)
12876
@@ -1327,6 +1328,7 @@
12880
+ reg_parm_stack_space,
12881
args[i].pass_on_stack ? 0 : args[i].partial,
12882
fndecl, args_size, &args[i].locate);
12883
#ifdef BLOCK_REG_PADDING
12884
@@ -3171,7 +3173,9 @@
12885
group load/store machinery below. */
12886
if (!structure_value_addr
12887
&& !pcc_struct_value
12888
+ && TYPE_MODE (rettype) != VOIDmode
12889
&& TYPE_MODE (rettype) != BLKmode
12890
+ && REG_P (valreg)
12891
&& targetm.calls.return_in_msb (rettype))
12893
if (shift_return_value (TYPE_MODE (rettype), false, valreg))
12894
@@ -3734,7 +3738,8 @@
12896
argvec[count].reg != 0,
12898
- 0, NULL_TREE, &args_size, &argvec[count].locate);
12899
+ reg_parm_stack_space, 0,
12900
+ NULL_TREE, &args_size, &argvec[count].locate);
12902
if (argvec[count].reg == 0 || argvec[count].partial != 0
12903
|| reg_parm_stack_space > 0)
12904
@@ -3821,7 +3826,7 @@
12906
argvec[count].reg != 0,
12908
- argvec[count].partial,
12909
+ reg_parm_stack_space, argvec[count].partial,
12910
NULL_TREE, &args_size, &argvec[count].locate);
12911
args_size.constant += argvec[count].locate.size.constant;
12912
gcc_assert (!argvec[count].locate.size.var);
12913
--- a/src/gcc/REVISION
12914
+++ b/src/gcc/REVISION
12916
+[ibm/gcc-4_8-branch merged from gcc-4_8-branch, revision 208295]
12917
--- a/src/gcc/config.gcc
12918
+++ b/src/gcc/config.gcc
12919
@@ -420,7 +420,7 @@
12923
- extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h"
12924
+ extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h"
12925
need_64bit_hwint=yes
12927
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[345678]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|Xe6500)
12928
@@ -2081,7 +2081,7 @@
12929
tmake_file="rs6000/t-fprules rs6000/t-ppcos ${tmake_file} rs6000/t-ppccomm"
12932
- tm_file="${tm_file} rs6000/sysv4le.h" ;;
12933
+ tm_file="${tm_file} rs6000/sysv4le.h" ;;
12937
@@ -2104,6 +2104,19 @@
12939
tm_file="rs6000/biarch64.h ${tm_file} rs6000/linux64.h glibc-stdint.h"
12940
tmake_file="$tmake_file rs6000/t-linux64"
12941
+ case ${target} in
12943
+ tmake_file="$tmake_file rs6000/t-linux64le"
12944
+ case ${enable_targets} in
12945
+ all | *powerpc64-* | *powerpc-*)
12946
+ tmake_file="$tmake_file rs6000/t-linux64lebe" ;;
12949
+ case ${enable_targets} in
12950
+ all | *powerpc64le-* | *powerpcle-*)
12951
+ tmake_file="$tmake_file rs6000/t-linux64bele" ;;
12954
extra_options="${extra_options} rs6000/linux64.opt"
12957
@@ -3509,7 +3522,7 @@
12960
powerpc*-*-* | rs6000-*-*)
12961
- supported_defaults="cpu cpu_32 cpu_64 float tune tune_32 tune_64"
12962
+ supported_defaults="abi cpu cpu_32 cpu_64 float tune tune_32 tune_64"
12964
for which in cpu cpu_32 cpu_64 tune tune_32 tune_64; do
12965
eval "val=\$with_$which"
12966
@@ -3546,6 +3559,16 @@
12971
+ case "$with_abi" in
12972
+ "" | elfv1 | elfv2 )
12976
+ echo "Unknown ABI used in --with-abi=$with_abi"
12983
--- a/src/gcc/config/rs6000/power8.md
12984
+++ b/src/gcc/config/rs6000/power8.md
12986
+;; Scheduling description for IBM POWER8 processor.
12987
+;; Copyright (C) 2013 Free Software Foundation, Inc.
12989
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
12991
+;; This file is part of GCC.
12993
+;; GCC is free software; you can redistribute it and/or modify it
12994
+;; under the terms of the GNU General Public License as published
12995
+;; by the Free Software Foundation; either version 3, or (at your
12996
+;; option) any later version.
12998
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
12999
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13000
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
13001
+;; License for more details.
13003
+;; You should have received a copy of the GNU General Public License
13004
+;; along with GCC; see the file COPYING3. If not see
13005
+;; <http://www.gnu.org/licenses/>.
13007
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
13009
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
13010
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
13011
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
13012
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
13013
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
13014
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
13015
+ du5_power8,du6_power8" "power8misc")
13018
+; Dispatch group reservations
13019
+(define_reservation "DU_any_power8"
13020
+ "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
13023
+; 2-way Cracked instructions go in slots 0-1
13024
+; (can also have a second in slots 3-4 if insns are adjacent)
13025
+(define_reservation "DU_cracked_power8"
13026
+ "du0_power8+du1_power8")
13028
+; Insns that are first in group
13029
+(define_reservation "DU_first_power8"
13032
+; Insns that are first and last in group
13033
+(define_reservation "DU_both_power8"
13034
+ "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
13035
+ du5_power8+du6_power8")
13037
+; Dispatch slots are allocated in order conforming to program order.
13038
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
13039
+ du5_power8,du6_power8")
13040
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
13042
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
13043
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
13044
+(absence_set "du4_power8" "du5_power8,du6_power8")
13045
+(absence_set "du5_power8" "du6_power8")
13048
+; Execution unit reservations
13049
+(define_reservation "FXU_power8"
13050
+ "fxu0_power8|fxu1_power8")
13052
+(define_reservation "LU_power8"
13053
+ "lu0_power8|lu1_power8")
13055
+(define_reservation "LSU_power8"
13056
+ "lsu0_power8|lsu1_power8")
13058
+(define_reservation "LU_or_LSU_power8"
13059
+ "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
13061
+(define_reservation "VSU_power8"
13062
+ "vsu0_power8|vsu1_power8")
13066
+(define_insn_reservation "power8-load" 3
13067
+ (and (eq_attr "type" "load")
13068
+ (eq_attr "cpu" "power8"))
13069
+ "DU_any_power8,LU_or_LSU_power8")
13071
+(define_insn_reservation "power8-load-update" 3
13072
+ (and (eq_attr "type" "load_u,load_ux")
13073
+ (eq_attr "cpu" "power8"))
13074
+ "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
13076
+(define_insn_reservation "power8-load-ext" 3
13077
+ (and (eq_attr "type" "load_ext")
13078
+ (eq_attr "cpu" "power8"))
13079
+ "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
13081
+(define_insn_reservation "power8-load-ext-update" 3
13082
+ (and (eq_attr "type" "load_ext_u,load_ext_ux")
13083
+ (eq_attr "cpu" "power8"))
13084
+ "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
13086
+(define_insn_reservation "power8-fpload" 5
13087
+ (and (eq_attr "type" "fpload,vecload")
13088
+ (eq_attr "cpu" "power8"))
13089
+ "DU_any_power8,LU_power8")
13091
+(define_insn_reservation "power8-fpload-update" 5
13092
+ (and (eq_attr "type" "fpload_u,fpload_ux")
13093
+ (eq_attr "cpu" "power8"))
13094
+ "DU_cracked_power8,LU_power8+FXU_power8")
13096
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
13097
+ (and (eq_attr "type" "store,store_u")
13098
+ (eq_attr "cpu" "power8"))
13099
+ "DU_any_power8,LSU_power8+LU_power8")
13101
+(define_insn_reservation "power8-store-update-indexed" 5
13102
+ (and (eq_attr "type" "store_ux")
13103
+ (eq_attr "cpu" "power8"))
13104
+ "DU_cracked_power8,LSU_power8+LU_power8")
13106
+(define_insn_reservation "power8-fpstore" 5
13107
+ (and (eq_attr "type" "fpstore")
13108
+ (eq_attr "cpu" "power8"))
13109
+ "DU_any_power8,LSU_power8+VSU_power8")
13111
+(define_insn_reservation "power8-fpstore-update" 5
13112
+ (and (eq_attr "type" "fpstore_u,fpstore_ux")
13113
+ (eq_attr "cpu" "power8"))
13114
+ "DU_any_power8,LSU_power8+VSU_power8")
13116
+(define_insn_reservation "power8-vecstore" 5
13117
+ (and (eq_attr "type" "vecstore")
13118
+ (eq_attr "cpu" "power8"))
13119
+ "DU_cracked_power8,LSU_power8+VSU_power8")
13121
+(define_insn_reservation "power8-larx" 3
13122
+ (and (eq_attr "type" "load_l")
13123
+ (eq_attr "cpu" "power8"))
13124
+ "DU_both_power8,LU_or_LSU_power8")
13126
+(define_insn_reservation "power8-stcx" 10
13127
+ (and (eq_attr "type" "store_c")
13128
+ (eq_attr "cpu" "power8"))
13129
+ "DU_both_power8,LSU_power8+LU_power8")
13131
+(define_insn_reservation "power8-sync" 1
13132
+ (and (eq_attr "type" "sync,isync")
13133
+ (eq_attr "cpu" "power8"))
13134
+ "DU_both_power8,LSU_power8")
13138
+(define_insn_reservation "power8-1cyc" 1
13139
+ (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
13140
+ var_shift_rotate,exts,isel")
13141
+ (eq_attr "cpu" "power8"))
13142
+ "DU_any_power8,FXU_power8")
13144
+; Extra cycle to LU/LSU
13145
+(define_bypass 2 "power8-1cyc"
13146
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
13147
+ power8-vecstore,power8-larx,power8-stcx")
13148
+; "power8-load,power8-load-update,power8-load-ext,\
13149
+; power8-load-ext-update,power8-fpload,power8-fpload-update,\
13150
+; power8-store,power8-store-update,power8-store-update-indexed,\
13151
+; power8-fpstore,power8-fpstore-update,power8-vecstore,\
13152
+; power8-larx,power8-stcx")
13154
+(define_insn_reservation "power8-2cyc" 2
13155
+ (and (eq_attr "type" "cntlz,popcnt")
13156
+ (eq_attr "cpu" "power8"))
13157
+ "DU_any_power8,FXU_power8")
13159
+(define_insn_reservation "power8-two" 2
13160
+ (and (eq_attr "type" "two")
13161
+ (eq_attr "cpu" "power8"))
13162
+ "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
13164
+(define_insn_reservation "power8-three" 3
13165
+ (and (eq_attr "type" "three")
13166
+ (eq_attr "cpu" "power8"))
13167
+ "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
13169
+; cmp - Normal compare insns
13170
+(define_insn_reservation "power8-cmp" 2
13171
+ (and (eq_attr "type" "cmp")
13172
+ (eq_attr "cpu" "power8"))
13173
+ "DU_any_power8,FXU_power8")
13175
+; fast_compare : add./and./nor./etc
13176
+(define_insn_reservation "power8-fast-compare" 2
13177
+ (and (eq_attr "type" "fast_compare")
13178
+ (eq_attr "cpu" "power8"))
13179
+ "DU_any_power8,FXU_power8")
13181
+; compare : rldicl./exts./etc
13182
+; delayed_compare : rlwinm./slwi./etc
13183
+; var_delayed_compare : rlwnm./slw./etc
13184
+(define_insn_reservation "power8-compare" 2
13185
+ (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
13186
+ (eq_attr "cpu" "power8"))
13187
+ "DU_cracked_power8,FXU_power8,FXU_power8")
13189
+; Extra cycle to LU/LSU
13190
+(define_bypass 3 "power8-fast-compare,power8-compare"
13191
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
13192
+ power8-vecstore,power8-larx,power8-stcx")
13194
+; 5 cycle CR latency
13195
+(define_bypass 5 "power8-fast-compare,power8-compare"
13196
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
13198
+(define_insn_reservation "power8-mul" 4
13199
+ (and (eq_attr "type" "imul,imul2,imul3,lmul")
13200
+ (eq_attr "cpu" "power8"))
13201
+ "DU_any_power8,FXU_power8")
13203
+(define_insn_reservation "power8-mul-compare" 4
13204
+ (and (eq_attr "type" "imul_compare,lmul_compare")
13205
+ (eq_attr "cpu" "power8"))
13206
+ "DU_cracked_power8,FXU_power8")
13208
+; Extra cycle to LU/LSU
13209
+(define_bypass 5 "power8-mul,power8-mul-compare"
13210
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
13211
+ power8-vecstore,power8-larx,power8-stcx")
13213
+; 7 cycle CR latency
13214
+(define_bypass 7 "power8-mul,power8-mul-compare"
13215
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
13217
+; FXU divides are not pipelined
13218
+(define_insn_reservation "power8-idiv" 37
13219
+ (and (eq_attr "type" "idiv")
13220
+ (eq_attr "cpu" "power8"))
13221
+ "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
13223
+(define_insn_reservation "power8-ldiv" 68
13224
+ (and (eq_attr "type" "ldiv")
13225
+ (eq_attr "cpu" "power8"))
13226
+ "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
13228
+(define_insn_reservation "power8-mtjmpr" 5
13229
+ (and (eq_attr "type" "mtjmpr")
13230
+ (eq_attr "cpu" "power8"))
13231
+ "DU_first_power8,FXU_power8")
13233
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
13234
+(define_insn_reservation "power8-mtcr" 3
13235
+ (and (eq_attr "type" "mtcr")
13236
+ (eq_attr "cpu" "power8"))
13237
+ "DU_both_power8,FXU_power8")
13241
+(define_insn_reservation "power8-mfjmpr" 5
13242
+ (and (eq_attr "type" "mfjmpr")
13243
+ (eq_attr "cpu" "power8"))
13244
+ "DU_first_power8,cru_power8+FXU_power8")
13246
+(define_insn_reservation "power8-crlogical" 3
13247
+ (and (eq_attr "type" "cr_logical,delayed_cr")
13248
+ (eq_attr "cpu" "power8"))
13249
+ "DU_first_power8,cru_power8")
13251
+(define_insn_reservation "power8-mfcr" 5
13252
+ (and (eq_attr "type" "mfcr")
13253
+ (eq_attr "cpu" "power8"))
13254
+ "DU_both_power8,cru_power8")
13256
+(define_insn_reservation "power8-mfcrf" 3
13257
+ (and (eq_attr "type" "mfcrf")
13258
+ (eq_attr "cpu" "power8"))
13259
+ "DU_first_power8,cru_power8")
13263
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
13264
+; prevent other insns from grabbing them once this is assigned.
13265
+(define_insn_reservation "power8-branch" 3
13266
+ (and (eq_attr "type" "jmpreg,branch")
13267
+ (eq_attr "cpu" "power8"))
13269
+ |du5_power8+du6_power8\
13270
+ |du4_power8+du5_power8+du6_power8\
13271
+ |du3_power8+du4_power8+du5_power8+du6_power8\
13272
+ |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
13273
+ |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
13274
+ |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
13275
+ du6_power8),bpu_power8")
13277
+; Branch updating LR/CTR feeding mf[lr|ctr]
13278
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
13281
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
13282
+(define_insn_reservation "power8-fp" 6
13283
+ (and (eq_attr "type" "fp,dmul")
13284
+ (eq_attr "cpu" "power8"))
13285
+ "DU_any_power8,VSU_power8")
13287
+; Additional 3 cycles for any CR result
13288
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
13290
+(define_insn_reservation "power8-fpcompare" 8
13291
+ (and (eq_attr "type" "fpcompare")
13292
+ (eq_attr "cpu" "power8"))
13293
+ "DU_any_power8,VSU_power8")
13295
+(define_insn_reservation "power8-sdiv" 27
13296
+ (and (eq_attr "type" "sdiv")
13297
+ (eq_attr "cpu" "power8"))
13298
+ "DU_any_power8,VSU_power8")
13300
+(define_insn_reservation "power8-ddiv" 33
13301
+ (and (eq_attr "type" "ddiv")
13302
+ (eq_attr "cpu" "power8"))
13303
+ "DU_any_power8,VSU_power8")
13305
+(define_insn_reservation "power8-sqrt" 32
13306
+ (and (eq_attr "type" "ssqrt")
13307
+ (eq_attr "cpu" "power8"))
13308
+ "DU_any_power8,VSU_power8")
13310
+(define_insn_reservation "power8-dsqrt" 44
13311
+ (and (eq_attr "type" "dsqrt")
13312
+ (eq_attr "cpu" "power8"))
13313
+ "DU_any_power8,VSU_power8")
13315
+(define_insn_reservation "power8-vecsimple" 2
13316
+ (and (eq_attr "type" "vecperm,vecsimple,veccmp")
13317
+ (eq_attr "cpu" "power8"))
13318
+ "DU_any_power8,VSU_power8")
13320
+(define_insn_reservation "power8-vecnormal" 6
13321
+ (and (eq_attr "type" "vecfloat,vecdouble")
13322
+ (eq_attr "cpu" "power8"))
13323
+ "DU_any_power8,VSU_power8")
13325
+(define_bypass 7 "power8-vecnormal"
13326
+ "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
13327
+ power8-vecstore")
13329
+(define_insn_reservation "power8-veccomplex" 7
13330
+ (and (eq_attr "type" "veccomplex")
13331
+ (eq_attr "cpu" "power8"))
13332
+ "DU_any_power8,VSU_power8")
13334
+(define_insn_reservation "power8-vecfdiv" 25
13335
+ (and (eq_attr "type" "vecfdiv")
13336
+ (eq_attr "cpu" "power8"))
13337
+ "DU_any_power8,VSU_power8")
13339
+(define_insn_reservation "power8-vecdiv" 31
13340
+ (and (eq_attr "type" "vecdiv")
13341
+ (eq_attr "cpu" "power8"))
13342
+ "DU_any_power8,VSU_power8")
13344
+(define_insn_reservation "power8-mffgpr" 5
13345
+ (and (eq_attr "type" "mffgpr")
13346
+ (eq_attr "cpu" "power8"))
13347
+ "DU_any_power8,VSU_power8")
13349
+(define_insn_reservation "power8-mftgpr" 6
13350
+ (and (eq_attr "type" "mftgpr")
13351
+ (eq_attr "cpu" "power8"))
13352
+ "DU_any_power8,VSU_power8")
13354
+(define_insn_reservation "power8-crypto" 7
13355
+ (and (eq_attr "type" "crypto")
13356
+ (eq_attr "cpu" "power8"))
13357
+ "DU_any_power8,VSU_power8")
13359
--- a/src/gcc/config/rs6000/vector.md
13360
+++ b/src/gcc/config/rs6000/vector.md
13361
@@ -24,13 +24,13 @@
13364
;; Vector int modes
13365
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
13366
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
13368
;; Vector float modes
13369
(define_mode_iterator VEC_F [V4SF V2DF])
13371
;; Vector arithmetic modes
13372
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
13373
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
13375
;; Vector modes that need alginment via permutes
13376
(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
13378
(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
13380
;; Vector comparison modes
13381
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
13382
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
13384
;; Vector init/extract modes
13385
(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
13387
(define_mode_iterator VEC_64 [V2DI V2DF])
13389
;; Vector reload iterator
13390
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
13391
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
13393
;; Base type from vector mode
13394
(define_mode_attr VEC_base [(V16QI "QI")
13399
-;; Vector move instructions.
13400
+;; Vector move instructions. Little-endian VSX loads and stores require
13401
+;; special handling to circumvent "element endianness."
13402
(define_expand "mov<mode>"
13403
[(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
13404
(match_operand:VEC_M 1 "any_operand" ""))]
13405
@@ -104,6 +105,16 @@
13406
&& !vlogical_operand (operands[1], <MODE>mode))
13407
operands[1] = force_reg (<MODE>mode, operands[1]);
13409
+ if (!BYTES_BIG_ENDIAN
13410
+ && VECTOR_MEM_VSX_P (<MODE>mode)
13411
+ && <MODE>mode != TImode
13412
+ && !gpr_or_gpr_p (operands[0], operands[1])
13413
+ && (memory_operand (operands[0], <MODE>mode)
13414
+ ^ memory_operand (operands[1], <MODE>mode)))
13416
+ rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
13421
;; Generic vector floating point load/store instructions. These will match
13422
@@ -126,7 +137,9 @@
13423
(match_operand:VEC_L 1 "input_operand" ""))]
13424
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
13425
&& reload_completed
13426
- && gpr_or_gpr_p (operands[0], operands[1])"
13427
+ && gpr_or_gpr_p (operands[0], operands[1])
13428
+ && !direct_move_p (operands[0], operands[1])
13429
+ && !quad_load_store_p (operands[0], operands[1])"
13432
rs6000_split_multireg_move (operands[0], operands[1]);
13433
@@ -249,7 +262,7 @@
13434
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
13435
(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
13436
(match_operand:VEC_F 2 "vfloat_operand" "")))]
13437
- "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
13438
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13440
if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
13442
@@ -395,7 +408,7 @@
13443
(match_operand:VEC_I 5 "vint_operand" "")])
13444
(match_operand:VEC_I 1 "vint_operand" "")
13445
(match_operand:VEC_I 2 "vint_operand" "")))]
13446
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
13447
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13450
if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
13451
@@ -451,7 +464,7 @@
13452
(match_operand:VEC_I 5 "vint_operand" "")])
13453
(match_operand:VEC_I 1 "vint_operand" "")
13454
(match_operand:VEC_I 2 "vint_operand" "")))]
13455
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
13456
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13459
if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
13460
@@ -505,7 +518,7 @@
13461
[(set (match_operand:VEC_I 0 "vint_operand" "")
13462
(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13463
(match_operand:VEC_I 2 "vint_operand" "")))]
13464
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
13465
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13468
(define_expand "vector_geu<mode>"
13469
@@ -512,7 +525,7 @@
13470
[(set (match_operand:VEC_I 0 "vint_operand" "")
13471
(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13472
(match_operand:VEC_I 2 "vint_operand" "")))]
13473
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
13474
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13477
(define_insn_and_split "*vector_uneq<mode>"
13478
@@ -595,8 +608,8 @@
13479
(ge:VEC_F (match_dup 2)
13482
- (not:VEC_F (ior:VEC_F (match_dup 3)
13483
- (match_dup 4))))]
13484
+ (and:VEC_F (not:VEC_F (match_dup 3))
13485
+ (not:VEC_F (match_dup 4))))]
13488
operands[3] = gen_reg_rtx (<MODE>mode);
13489
@@ -708,48 +721,19 @@
13493
-;; Vector logical instructions
13494
-(define_expand "xor<mode>3"
13495
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13496
- (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
13497
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
13498
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13500
+;; Vector count leading zeros
13501
+(define_expand "clz<mode>2"
13502
+ [(set (match_operand:VEC_I 0 "register_operand" "")
13503
+ (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
13504
+ "TARGET_P8_VECTOR")
13506
-(define_expand "ior<mode>3"
13507
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13508
- (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
13509
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
13510
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13512
+;; Vector population count
13513
+(define_expand "popcount<mode>2"
13514
+ [(set (match_operand:VEC_I 0 "register_operand" "")
13515
+ (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
13516
+ "TARGET_P8_VECTOR")
13518
-(define_expand "and<mode>3"
13519
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13520
- (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
13521
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
13522
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13525
-(define_expand "one_cmpl<mode>2"
13526
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13527
- (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
13528
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13531
-(define_expand "nor<mode>3"
13532
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13533
- (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
13534
- (match_operand:VEC_L 2 "vlogical_operand" ""))))]
13535
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13538
-(define_expand "andc<mode>3"
13539
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
13540
- (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
13541
- (match_operand:VEC_L 1 "vlogical_operand" "")))]
13542
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13546
;; Same size conversions
13547
(define_expand "float<VEC_int><mode>2"
13548
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
13549
@@ -889,7 +873,7 @@
13551
rtx reg = gen_reg_rtx (V4SFmode);
13553
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
13554
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
13555
emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
13558
@@ -901,7 +885,7 @@
13560
rtx reg = gen_reg_rtx (V4SFmode);
13562
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
13563
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
13564
emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
13567
@@ -913,7 +897,7 @@
13569
rtx reg = gen_reg_rtx (V4SImode);
13571
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
13572
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
13573
emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
13576
@@ -925,7 +909,7 @@
13578
rtx reg = gen_reg_rtx (V4SImode);
13580
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
13581
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
13582
emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
13585
@@ -937,7 +921,7 @@
13587
rtx reg = gen_reg_rtx (V4SImode);
13589
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
13590
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
13591
emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
13594
@@ -949,7 +933,7 @@
13596
rtx reg = gen_reg_rtx (V4SImode);
13598
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
13599
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
13600
emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
13603
@@ -963,8 +947,19 @@
13604
(match_operand:V16QI 3 "vlogical_operand" "")]
13605
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
13607
- emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2],
13609
+ if (BYTES_BIG_ENDIAN)
13610
+ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
13611
+ operands[2], operands[3]));
13614
+ /* We have changed lvsr to lvsl, so to complete the transformation
13615
+ of vperm for LE, we must swap the inputs. */
13616
+ rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
13617
+ gen_rtvec (3, operands[2],
13618
+ operands[1], operands[3]),
13620
+ emit_move_insn (operands[0], unspec);
13625
@@ -1064,7 +1059,7 @@
13626
[(set (match_operand:VEC_I 0 "vint_operand" "")
13627
(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13628
(match_operand:VEC_I 2 "vint_operand" "")))]
13630
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13633
;; Expanders for arithmetic shift left on each vector element
13634
@@ -1072,7 +1067,7 @@
13635
[(set (match_operand:VEC_I 0 "vint_operand" "")
13636
(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13637
(match_operand:VEC_I 2 "vint_operand" "")))]
13639
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13642
;; Expanders for logical shift right on each vector element
13643
@@ -1080,7 +1075,7 @@
13644
[(set (match_operand:VEC_I 0 "vint_operand" "")
13645
(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13646
(match_operand:VEC_I 2 "vint_operand" "")))]
13648
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13651
;; Expanders for arithmetic shift right on each vector element
13652
@@ -1088,7 +1083,7 @@
13653
[(set (match_operand:VEC_I 0 "vint_operand" "")
13654
(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
13655
(match_operand:VEC_I 2 "vint_operand" "")))]
13657
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
13660
;; Vector reduction expanders for VSX
13661
--- a/src/gcc/config/rs6000/constraints.md
13662
+++ b/src/gcc/config/rs6000/constraints.md
13663
@@ -52,22 +52,62 @@
13666
;; Use w as a prefix to add VSX modes
13667
-;; vector double (V2DF)
13668
+;; any VSX register
13669
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
13670
+ "Any VSX register if the -mvsx option was used or NO_REGS.")
13672
(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
13674
+ "VSX vector register to hold vector double data or NO_REGS.")
13676
-;; vector float (V4SF)
13677
(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
13679
+ "VSX vector register to hold vector float data or NO_REGS.")
13681
-;; scalar double (DF)
13682
+(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]"
13683
+ "If -mmfpgpr was used, a floating point register or NO_REGS.")
13685
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
13686
+ "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
13688
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
13689
+ "VSX register if direct move instructions are enabled, or NO_REGS.")
13691
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
13692
+;; direct move directly, and movsf can't to move between the register sets.
13693
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
13694
+(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
13696
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
13697
+ "General purpose register if 64-bit instructions are enabled or NO_REGS.")
13699
(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
13701
+ "VSX vector register to hold scalar double values or NO_REGS.")
13703
-;; any VSX register
13704
-(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
13706
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
13707
+ "VSX vector register to hold 128 bit integer or NO_REGS.")
13709
+(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]"
13710
+ "Altivec register to use for float/32-bit int loads/stores or NO_REGS.")
13712
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
13713
+ "Altivec register to use for double loads/stores or NO_REGS.")
13715
+(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
13716
+ "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
13718
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
13719
+ "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
13721
+(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]"
13722
+ "VSX vector register to hold scalar float values or NO_REGS.")
13724
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
13725
+ "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
13727
+;; Lq/stq validates the address for load/store quad
13728
+(define_memory_constraint "wQ"
13729
+ "Memory operand suitable for the load/store quad instructions"
13730
+ (match_operand 0 "quad_memory_operand"))
13732
;; Altivec style load/store that ignores the bottom bits of the address
13733
(define_memory_constraint "wZ"
13734
"Indexed or indirect memory operand, ignoring the bottom 4 bits"
13735
--- a/src/gcc/config/rs6000/predicates.md
13736
+++ b/src/gcc/config/rs6000/predicates.md
13737
@@ -124,6 +124,11 @@
13738
(and (match_code "const_int")
13739
(match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
13741
+;; Return 1 if op is a unsigned 3-bit constant integer.
13742
+(define_predicate "u3bit_cint_operand"
13743
+ (and (match_code "const_int")
13744
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
13746
;; Return 1 if op is a unsigned 5-bit constant integer.
13747
(define_predicate "u5bit_cint_operand"
13748
(and (match_code "const_int")
13749
@@ -135,6 +140,11 @@
13750
(and (match_code "const_int")
13751
(match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127")))
13753
+;; Return 1 if op is a unsigned 10-bit constant integer.
13754
+(define_predicate "u10bit_cint_operand"
13755
+ (and (match_code "const_int")
13756
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023")))
13758
;; Return 1 if op is a constant integer that can fit in a D field.
13759
(define_predicate "short_cint_operand"
13760
(and (match_code "const_int")
13761
@@ -166,6 +176,11 @@
13762
(and (match_code "const_int")
13763
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
13765
+;; Match op = 0..15
13766
+(define_predicate "const_0_to_15_operand"
13767
+ (and (match_code "const_int")
13768
+ (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
13770
;; Return 1 if op is a register that is not special.
13771
(define_predicate "gpc_reg_operand"
13772
(match_operand 0 "register_operand")
13773
@@ -182,9 +197,95 @@
13774
if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
13777
+ if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
13780
return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
13783
+;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't
13784
+;; allow floating point or vector registers.
13785
+(define_predicate "int_reg_operand"
13786
+ (match_operand 0 "register_operand")
13788
+ if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
13791
+ if (GET_CODE (op) == SUBREG)
13792
+ op = SUBREG_REG (op);
13797
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
13800
+ return INT_REGNO_P (REGNO (op));
13803
+;; Like int_reg_operand, but only return true for base registers
13804
+(define_predicate "base_reg_operand"
13805
+ (match_operand 0 "int_reg_operand")
13807
+ if (GET_CODE (op) == SUBREG)
13808
+ op = SUBREG_REG (op);
13813
+ return (REGNO (op) != FIRST_GPR_REGNO);
13816
+;; Return 1 if op is a HTM specific SPR register.
13817
+(define_predicate "htm_spr_reg_operand"
13818
+ (match_operand 0 "register_operand")
13823
+ if (GET_CODE (op) == SUBREG)
13824
+ op = SUBREG_REG (op);
13829
+ switch (REGNO (op))
13831
+ case TFHAR_REGNO:
13832
+ case TFIAR_REGNO:
13833
+ case TEXASR_REGNO:
13839
+ /* Unknown SPR. */
13843
+;; Return 1 if op is a general purpose register that is an even register
13844
+;; which suitable for a load/store quad operation
13845
+(define_predicate "quad_int_reg_operand"
13846
+ (match_operand 0 "register_operand")
13850
+ if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
13853
+ if (GET_CODE (op) == SUBREG)
13854
+ op = SUBREG_REG (op);
13860
+ if (r >= FIRST_PSEUDO_REGISTER)
13863
+ return (INT_REGNO_P (r) && ((r & 1) == 0));
13866
;; Return 1 if op is a register that is a condition register field.
13867
(define_predicate "cc_reg_operand"
13868
(match_operand 0 "register_operand")
13869
@@ -315,6 +416,11 @@
13870
&& CONST_DOUBLE_HIGH (op) == 0")
13871
(match_operand 0 "gpc_reg_operand"))))
13873
+;; Like reg_or_logical_cint_operand, but allow vsx registers
13874
+(define_predicate "vsx_reg_or_cint_operand"
13875
+ (ior (match_operand 0 "vsx_register_operand")
13876
+ (match_operand 0 "reg_or_logical_cint_operand")))
13878
;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
13879
;; with no more than one instruction per word.
13880
(define_predicate "easy_fp_constant"
13881
@@ -333,6 +439,11 @@
13885
+ /* The constant 0.0 is easy under VSX. */
13886
+ if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode)
13887
+ && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode))
13890
if (DECIMAL_FLOAT_MODE_P (mode))
13893
@@ -521,6 +632,55 @@
13894
(and (match_operand 0 "memory_operand")
13895
(match_test "offsettable_nonstrict_memref_p (op)")))
13897
+;; Return 1 if the operand is suitable for load/store quad memory.
13898
+;; This predicate only checks for non-atomic loads/stores.
13899
+(define_predicate "quad_memory_operand"
13900
+ (match_code "mem")
13902
+ rtx addr, op0, op1;
13905
+ if (!TARGET_QUAD_MEMORY)
13908
+ else if (!memory_operand (op, mode))
13911
+ else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
13914
+ else if (MEM_ALIGN (op) < 128)
13919
+ addr = XEXP (op, 0);
13920
+ if (int_reg_operand (addr, Pmode))
13923
+ else if (GET_CODE (addr) != PLUS)
13928
+ op0 = XEXP (addr, 0);
13929
+ op1 = XEXP (addr, 1);
13930
+ ret = (int_reg_operand (op0, Pmode)
13931
+ && GET_CODE (op1) == CONST_INT
13932
+ && IN_RANGE (INTVAL (op1), -32768, 32767)
13933
+ && (INTVAL (op1) & 15) == 0);
13937
+ if (TARGET_DEBUG_ADDR)
13939
+ fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
13946
;; Return 1 if the operand is an indexed or indirect memory operand.
13947
(define_predicate "indexed_or_indirect_operand"
13949
@@ -535,6 +695,19 @@
13950
return indexed_or_indirect_address (op, mode);
13953
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
13954
+;; moves are supported.
13955
+(define_predicate "reg_or_indexed_operand"
13956
+ (match_code "mem,reg")
13959
+ return indexed_or_indirect_operand (op, mode);
13960
+ else if (TARGET_DIRECT_MOVE)
13961
+ return register_operand (op, mode);
13966
;; Return 1 if the operand is an indexed or indirect memory operand with an
13967
;; AND -16 in it, used to recognize when we need to switch to Altivec loads
13968
;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
13969
@@ -560,6 +733,28 @@
13970
&& REG_P (XEXP (op, 1)))")
13971
(match_operand 0 "address_operand")))
13973
+;; Return 1 if the operand is an index-form address.
13974
+(define_special_predicate "indexed_address"
13975
+ (match_test "(GET_CODE (op) == PLUS
13976
+ && REG_P (XEXP (op, 0))
13977
+ && REG_P (XEXP (op, 1)))"))
13979
+;; Return 1 if the operand is a MEM with an update-form address. This may
13980
+;; also include update-indexed form.
13981
+(define_special_predicate "update_address_mem"
13982
+ (match_test "(MEM_P (op)
13983
+ && (GET_CODE (XEXP (op, 0)) == PRE_INC
13984
+ || GET_CODE (XEXP (op, 0)) == PRE_DEC
13985
+ || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))"))
13987
+;; Return 1 if the operand is a MEM with an update-indexed-form address. Note
13988
+;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the
13989
+;; increment is based on the mode size and will therefor always be a const.
13990
+(define_special_predicate "update_indexed_address_mem"
13991
+ (match_test "(MEM_P (op)
13992
+ && GET_CODE (XEXP (op, 0)) == PRE_MODIFY
13993
+ && indexed_address (XEXP (XEXP (op, 0), 1), mode))"))
13995
;; Used for the destination of the fix_truncdfsi2 expander.
13996
;; If stfiwx will be used, the result goes to memory; otherwise,
13997
;; we're going to emit a store and a load of a subreg, so the dest is a
13998
@@ -883,7 +1078,8 @@
13999
(and (match_code "symbol_ref")
14000
(match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
14001
&& ((SYMBOL_REF_LOCAL_P (op)
14002
- && (DEFAULT_ABI != ABI_AIX
14003
+ && ((DEFAULT_ABI != ABI_AIX
14004
+ && DEFAULT_ABI != ABI_ELFv2)
14005
|| !SYMBOL_REF_EXTERNAL_P (op)))
14006
|| (op == XEXP (DECL_RTL (current_function_decl),
14008
@@ -1364,6 +1560,26 @@
14012
+;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL.
14013
+(define_predicate "crsave_operation"
14014
+ (match_code "parallel")
14016
+ int count = XVECLEN (op, 0);
14019
+ for (i = 1; i < count; i++)
14021
+ rtx exp = XVECEXP (op, 0, i);
14023
+ if (GET_CODE (exp) != USE
14024
+ || GET_CODE (XEXP (exp, 0)) != REG
14025
+ || GET_MODE (XEXP (exp, 0)) != CCmode
14026
+ || ! CR_REGNO_P (REGNO (XEXP (exp, 0))))
14032
;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
14033
(define_predicate "lmw_operation"
14034
(match_code "parallel")
14035
@@ -1534,3 +1750,99 @@
14037
return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
14040
+;; Match the first insn (addis) in fusing the combination of addis and loads to
14041
+;; GPR registers on power8.
14042
+(define_predicate "fusion_gpr_addis"
14043
+ (match_code "const_int,high,plus")
14045
+ HOST_WIDE_INT value;
14048
+ if (GET_CODE (op) == HIGH)
14051
+ if (CONST_INT_P (op))
14054
+ else if (GET_CODE (op) == PLUS
14055
+ && base_reg_operand (XEXP (op, 0), Pmode)
14056
+ && CONST_INT_P (XEXP (op, 1)))
14057
+ int_const = XEXP (op, 1);
14062
+ /* Power8 currently will only do the fusion if the top 11 bits of the addis
14063
+ value are all 1's or 0's. */
14064
+ value = INTVAL (int_const);
14065
+ if ((value & (HOST_WIDE_INT)0xffff) != 0)
14068
+ if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
14071
+ return (IN_RANGE (value >> 16, -32, 31));
14074
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
14075
+;; and loads to GPR registers on power8.
14076
+(define_predicate "fusion_gpr_mem_load"
14077
+ (match_code "mem,sign_extend,zero_extend")
14081
+ /* Handle sign/zero extend. */
14082
+ if (GET_CODE (op) == ZERO_EXTEND
14083
+ || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
14085
+ op = XEXP (op, 0);
14086
+ mode = GET_MODE (op);
14100
+ if (!TARGET_POWERPC64)
14108
+ addr = XEXP (op, 0);
14109
+ if (GET_CODE (addr) == PLUS)
14111
+ rtx base = XEXP (addr, 0);
14112
+ rtx offset = XEXP (addr, 1);
14114
+ return (base_reg_operand (base, GET_MODE (base))
14115
+ && satisfies_constraint_I (offset));
14118
+ else if (GET_CODE (addr) == LO_SUM)
14120
+ rtx base = XEXP (addr, 0);
14121
+ rtx offset = XEXP (addr, 1);
14123
+ if (!base_reg_operand (base, GET_MODE (base)))
14126
+ else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
14127
+ return small_toc_ref (offset, GET_MODE (offset));
14129
+ else if (TARGET_ELF && !TARGET_POWERPC64)
14130
+ return CONSTANT_P (offset);
14135
--- a/src/gcc/config/rs6000/ppc-asm.h
14136
+++ b/src/gcc/config/rs6000/ppc-asm.h
14137
@@ -256,7 +256,30 @@
14138
* the real function with one or two leading periods respectively.
14141
-#if defined (__powerpc64__)
14142
+#if defined(__powerpc64__) && _CALL_ELF == 2
14144
+/* Defining "toc" above breaks @toc in assembler code. */
14147
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
14148
+#define JUMP_TARGET(name) FUNC_NAME(name)
14149
+#define FUNC_START(name) \
14150
+ .type FUNC_NAME(name),@function; \
14151
+ .globl FUNC_NAME(name); \
14152
+FUNC_NAME(name): \
14153
+0: addis 2,12,(.TOC.-0b)@ha; \
14154
+ addi 2,2,(.TOC.-0b)@l; \
14155
+ .localentry FUNC_NAME(name),.-FUNC_NAME(name)
14157
+#define HIDDEN_FUNC(name) \
14158
+ FUNC_START(name) \
14159
+ .hidden FUNC_NAME(name);
14161
+#define FUNC_END(name) \
14162
+ .size FUNC_NAME(name),.-FUNC_NAME(name)
14164
+#elif defined (__powerpc64__)
14166
#define FUNC_NAME(name) GLUE(.,name)
14167
#define JUMP_TARGET(name) FUNC_NAME(name)
14168
#define FUNC_START(name) \
14169
--- a/src/gcc/config/rs6000/htm.md
14170
+++ b/src/gcc/config/rs6000/htm.md
14172
+;; Hardware Transactional Memory (HTM) patterns.
14173
+;; Copyright (C) 2013 Free Software Foundation, Inc.
14174
+;; Contributed by Peter Bergner <bergner@vnet.ibm.com>.
14176
+;; This file is part of GCC.
14178
+;; GCC is free software; you can redistribute it and/or modify it
14179
+;; under the terms of the GNU General Public License as published
14180
+;; by the Free Software Foundation; either version 3, or (at your
14181
+;; option) any later version.
14183
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
14184
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14185
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14186
+;; License for more details.
14188
+;; You should have received a copy of the GNU General Public License
14189
+;; along with GCC; see the file COPYING3. If not see
14190
+;; <http://www.gnu.org/licenses/>.
14196
+ (TEXASRU_SPR 131)
14197
+ (MAX_HTM_OPERANDS 4)
14201
+;; UNSPEC_VOLATILE usage
14204
+(define_c_enum "unspecv"
14205
+ [UNSPECV_HTM_TABORT
14206
+ UNSPECV_HTM_TABORTDC
14207
+ UNSPECV_HTM_TABORTDCI
14208
+ UNSPECV_HTM_TABORTWC
14209
+ UNSPECV_HTM_TABORTWCI
14210
+ UNSPECV_HTM_TBEGIN
14211
+ UNSPECV_HTM_TCHECK
14213
+ UNSPECV_HTM_TRECHKPT
14214
+ UNSPECV_HTM_TRECLAIM
14216
+ UNSPECV_HTM_MFSPR
14217
+ UNSPECV_HTM_MTSPR
14221
+(define_expand "tabort"
14222
+ [(set (match_dup 2)
14223
+ (unspec_volatile:CC [(match_operand:SI 1 "int_reg_operand" "")]
14224
+ UNSPECV_HTM_TABORT))
14225
+ (set (match_dup 3)
14226
+ (eq:SI (match_dup 2)
14228
+ (set (match_operand:SI 0 "int_reg_operand" "")
14229
+ (minus:SI (const_int 1) (match_dup 3)))]
14232
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14233
+ operands[3] = gen_reg_rtx (SImode);
14236
+(define_insn "*tabort_internal"
14237
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14238
+ (unspec_volatile:CC [(match_operand:SI 0 "int_reg_operand" "r")]
14239
+ UNSPECV_HTM_TABORT))]
14242
+ [(set_attr "type" "htm")
14243
+ (set_attr "length" "4")])
14245
+(define_expand "tabortdc"
14246
+ [(set (match_dup 4)
14247
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
14248
+ (match_operand:SI 2 "gpc_reg_operand" "r")
14249
+ (match_operand:SI 3 "gpc_reg_operand" "r")]
14250
+ UNSPECV_HTM_TABORTDC))
14251
+ (set (match_dup 5)
14252
+ (eq:SI (match_dup 4)
14254
+ (set (match_operand:SI 0 "int_reg_operand" "")
14255
+ (minus:SI (const_int 1) (match_dup 5)))]
14258
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
14259
+ operands[5] = gen_reg_rtx (SImode);
14262
+(define_insn "*tabortdc_internal"
14263
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
14264
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
14265
+ (match_operand:SI 1 "gpc_reg_operand" "r")
14266
+ (match_operand:SI 2 "gpc_reg_operand" "r")]
14267
+ UNSPECV_HTM_TABORTDC))]
14269
+ "tabortdc. %0,%1,%2"
14270
+ [(set_attr "type" "htm")
14271
+ (set_attr "length" "4")])
14273
+(define_expand "tabortdci"
14274
+ [(set (match_dup 4)
14275
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
14276
+ (match_operand:SI 2 "gpc_reg_operand" "r")
14277
+ (match_operand 3 "s5bit_cint_operand" "n")]
14278
+ UNSPECV_HTM_TABORTDCI))
14279
+ (set (match_dup 5)
14280
+ (eq:SI (match_dup 4)
14282
+ (set (match_operand:SI 0 "int_reg_operand" "")
14283
+ (minus:SI (const_int 1) (match_dup 5)))]
14286
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
14287
+ operands[5] = gen_reg_rtx (SImode);
14290
+(define_insn "*tabortdci_internal"
14291
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
14292
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
14293
+ (match_operand:SI 1 "gpc_reg_operand" "r")
14294
+ (match_operand 2 "s5bit_cint_operand" "n")]
14295
+ UNSPECV_HTM_TABORTDCI))]
14297
+ "tabortdci. %0,%1,%2"
14298
+ [(set_attr "type" "htm")
14299
+ (set_attr "length" "4")])
14301
+(define_expand "tabortwc"
14302
+ [(set (match_dup 4)
14303
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
14304
+ (match_operand:SI 2 "gpc_reg_operand" "r")
14305
+ (match_operand:SI 3 "gpc_reg_operand" "r")]
14306
+ UNSPECV_HTM_TABORTWC))
14307
+ (set (match_dup 5)
14308
+ (eq:SI (match_dup 4)
14310
+ (set (match_operand:SI 0 "int_reg_operand" "")
14311
+ (minus:SI (const_int 1) (match_dup 5)))]
14314
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
14315
+ operands[5] = gen_reg_rtx (SImode);
14318
+(define_insn "*tabortwc_internal"
14319
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
14320
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
14321
+ (match_operand:SI 1 "gpc_reg_operand" "r")
14322
+ (match_operand:SI 2 "gpc_reg_operand" "r")]
14323
+ UNSPECV_HTM_TABORTWC))]
14325
+ "tabortwc. %0,%1,%2"
14326
+ [(set_attr "type" "htm")
14327
+ (set_attr "length" "4")])
14329
+(define_expand "tabortwci"
14330
+ [(set (match_dup 4)
14331
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
14332
+ (match_operand:SI 2 "gpc_reg_operand" "r")
14333
+ (match_operand 3 "s5bit_cint_operand" "n")]
14334
+ UNSPECV_HTM_TABORTWCI))
14335
+ (set (match_dup 5)
14336
+ (eq:SI (match_dup 4)
14338
+ (set (match_operand:SI 0 "int_reg_operand" "")
14339
+ (minus:SI (const_int 1) (match_dup 5)))]
14342
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
14343
+ operands[5] = gen_reg_rtx (SImode);
14346
+(define_expand "ttest"
14347
+ [(set (match_dup 1)
14348
+ (unspec_volatile:CC [(const_int 0)
14351
+ UNSPECV_HTM_TABORTWCI))
14352
+ (set (subreg:CC (match_dup 2) 0) (match_dup 1))
14353
+ (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
14354
+ (parallel [(set (match_operand:SI 0 "int_reg_operand" "")
14355
+ (and:SI (match_dup 3) (const_int 15)))
14356
+ (clobber (scratch:CC))])]
14359
+ operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
14360
+ operands[2] = gen_reg_rtx (SImode);
14361
+ operands[3] = gen_reg_rtx (SImode);
14364
+(define_insn "*tabortwci_internal"
14365
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
14366
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
14367
+ (match_operand:SI 1 "gpc_reg_operand" "r")
14368
+ (match_operand 2 "s5bit_cint_operand" "n")]
14369
+ UNSPECV_HTM_TABORTWCI))]
14371
+ "tabortwci. %0,%1,%2"
14372
+ [(set_attr "type" "htm")
14373
+ (set_attr "length" "4")])
14375
+(define_expand "tbegin"
14376
+ [(set (match_dup 2)
14377
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
14378
+ UNSPECV_HTM_TBEGIN))
14379
+ (set (match_dup 3)
14380
+ (eq:SI (match_dup 2)
14382
+ (set (match_operand:SI 0 "int_reg_operand" "")
14383
+ (minus:SI (const_int 1) (match_dup 3)))]
14386
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14387
+ operands[3] = gen_reg_rtx (SImode);
14390
+(define_insn "*tbegin_internal"
14391
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14392
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
14393
+ UNSPECV_HTM_TBEGIN))]
14396
+ [(set_attr "type" "htm")
14397
+ (set_attr "length" "4")])
14399
+(define_expand "tcheck"
14400
+ [(set (match_dup 2)
14401
+ (unspec_volatile:CC [(match_operand 1 "u3bit_cint_operand" "n")]
14402
+ UNSPECV_HTM_TCHECK))
14403
+ (set (match_dup 3)
14404
+ (eq:SI (match_dup 2)
14406
+ (set (match_operand:SI 0 "int_reg_operand" "")
14407
+ (minus:SI (const_int 1) (match_dup 3)))]
14410
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14411
+ operands[3] = gen_reg_rtx (SImode);
14414
+(define_insn "*tcheck_internal"
14415
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14416
+ (unspec_volatile:CC [(match_operand 0 "u3bit_cint_operand" "n")]
14417
+ UNSPECV_HTM_TCHECK))]
14420
+ [(set_attr "type" "htm")
14421
+ (set_attr "length" "4")])
14423
+(define_expand "tend"
14424
+ [(set (match_dup 2)
14425
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
14426
+ UNSPECV_HTM_TEND))
14427
+ (set (match_dup 3)
14428
+ (eq:SI (match_dup 2)
14430
+ (set (match_operand:SI 0 "int_reg_operand" "")
14431
+ (minus:SI (const_int 1) (match_dup 3)))]
14434
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14435
+ operands[3] = gen_reg_rtx (SImode);
14438
+(define_insn "*tend_internal"
14439
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14440
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
14441
+ UNSPECV_HTM_TEND))]
14444
+ [(set_attr "type" "htm")
14445
+ (set_attr "length" "4")])
14447
+(define_expand "trechkpt"
14448
+ [(set (match_dup 1)
14449
+ (unspec_volatile:CC [(const_int 0)]
14450
+ UNSPECV_HTM_TRECHKPT))
14451
+ (set (match_dup 2)
14452
+ (eq:SI (match_dup 1)
14454
+ (set (match_operand:SI 0 "int_reg_operand" "")
14455
+ (minus:SI (const_int 1) (match_dup 2)))]
14458
+ operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
14459
+ operands[2] = gen_reg_rtx (SImode);
14462
+(define_insn "*trechkpt_internal"
14463
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
14464
+ (unspec_volatile:CC [(const_int 0)]
14465
+ UNSPECV_HTM_TRECHKPT))]
14468
+ [(set_attr "type" "htm")
14469
+ (set_attr "length" "4")])
14471
+(define_expand "treclaim"
14472
+ [(set (match_dup 2)
14473
+ (unspec_volatile:CC [(match_operand:SI 1 "gpc_reg_operand" "r")]
14474
+ UNSPECV_HTM_TRECLAIM))
14475
+ (set (match_dup 3)
14476
+ (eq:SI (match_dup 2)
14478
+ (set (match_operand:SI 0 "int_reg_operand" "")
14479
+ (minus:SI (const_int 1) (match_dup 3)))]
14482
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14483
+ operands[3] = gen_reg_rtx (SImode);
14486
+(define_insn "*treclaim_internal"
14487
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14488
+ (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
14489
+ UNSPECV_HTM_TRECLAIM))]
14492
+ [(set_attr "type" "htm")
14493
+ (set_attr "length" "4")])
14495
+(define_expand "tsr"
14496
+ [(set (match_dup 2)
14497
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
14498
+ UNSPECV_HTM_TSR))
14499
+ (set (match_dup 3)
14500
+ (eq:SI (match_dup 2)
14502
+ (set (match_operand:SI 0 "int_reg_operand" "")
14503
+ (minus:SI (const_int 1) (match_dup 3)))]
14506
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
14507
+ operands[3] = gen_reg_rtx (SImode);
14510
+(define_insn "*tsr_internal"
14511
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
14512
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
14513
+ UNSPECV_HTM_TSR))]
14516
+ [(set_attr "type" "htm")
14517
+ (set_attr "length" "4")])
14519
+(define_insn "htm_mfspr_<mode>"
14520
+ [(set (match_operand:P 0 "gpc_reg_operand" "=r")
14521
+ (unspec_volatile:P [(match_operand 1 "u10bit_cint_operand" "n")
14522
+ (match_operand:P 2 "htm_spr_reg_operand" "")]
14523
+ UNSPECV_HTM_MFSPR))]
14526
+ [(set_attr "type" "htm")
14527
+ (set_attr "length" "4")])
14529
+(define_insn "htm_mtspr_<mode>"
14530
+ [(set (match_operand:P 2 "htm_spr_reg_operand" "")
14531
+ (unspec_volatile:P [(match_operand:P 0 "gpc_reg_operand" "r")
14532
+ (match_operand 1 "u10bit_cint_operand" "n")]
14533
+ UNSPECV_HTM_MTSPR))]
14536
+ [(set_attr "type" "htm")
14537
+ (set_attr "length" "4")])
14538
--- a/src/gcc/config/rs6000/rs6000-modes.def
14539
+++ b/src/gcc/config/rs6000/rs6000-modes.def
14541
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
14542
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
14543
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
14545
+/* Replacement for TImode that only is allowed in GPRs. We also use PTImode
14546
+ for quad memory atomic operations to force getting an even/odd register
14548
+PARTIAL_INT_MODE (TI);
14549
--- a/src/gcc/config/rs6000/rs6000-cpus.def
14550
+++ b/src/gcc/config/rs6000/rs6000-cpus.def
14552
ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel,
14553
fre, fsqrt, etc. were no longer documented as optional. Group masks by
14554
server and embedded. */
14555
-#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \
14556
+#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \
14557
| OPTION_MASK_CMPB \
14558
| OPTION_MASK_RECIP_PRECISION \
14559
| OPTION_MASK_PPC_GFXOPT \
14562
/* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
14563
altivec is a win so enable it. */
14564
+ /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until
14565
+ PR 58587 is fixed. */
14566
#define ISA_2_6_MASKS_EMBEDDED (ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD)
14567
#define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \
14568
| OPTION_MASK_POPCNTD \
14570
| OPTION_MASK_ALTIVEC \
14573
+/* For now, don't provide an embedded version of ISA 2.07. */
14574
+#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \
14575
+ | OPTION_MASK_P8_FUSION \
14576
+ | OPTION_MASK_P8_VECTOR \
14577
+ | OPTION_MASK_CRYPTO \
14578
+ | OPTION_MASK_DIRECT_MOVE \
14579
+ | OPTION_MASK_HTM \
14580
+ | OPTION_MASK_QUAD_MEMORY \
14581
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC)
14583
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
14585
/* Deal with ports that do not have -mstrict-align. */
14586
@@ -60,23 +72,30 @@
14587
/* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */
14588
#define POWERPC_MASKS (OPTION_MASK_ALTIVEC \
14589
| OPTION_MASK_CMPB \
14590
+ | OPTION_MASK_CRYPTO \
14591
| OPTION_MASK_DFP \
14592
+ | OPTION_MASK_DIRECT_MOVE \
14593
| OPTION_MASK_DLMZB \
14594
| OPTION_MASK_FPRND \
14595
+ | OPTION_MASK_HTM \
14596
| OPTION_MASK_ISEL \
14597
| OPTION_MASK_MFCRF \
14598
| OPTION_MASK_MFPGPR \
14599
| OPTION_MASK_MULHW \
14600
| OPTION_MASK_NO_UPDATE \
14601
+ | OPTION_MASK_P8_FUSION \
14602
+ | OPTION_MASK_P8_VECTOR \
14603
| OPTION_MASK_POPCNTB \
14604
| OPTION_MASK_POPCNTD \
14605
| OPTION_MASK_POWERPC64 \
14606
| OPTION_MASK_PPC_GFXOPT \
14607
| OPTION_MASK_PPC_GPOPT \
14608
+ | OPTION_MASK_QUAD_MEMORY \
14609
| OPTION_MASK_RECIP_PRECISION \
14610
| OPTION_MASK_SOFT_FLOAT \
14611
| OPTION_MASK_STRICT_ALIGN_OPTIONAL \
14612
- | OPTION_MASK_VSX)
14613
+ | OPTION_MASK_VSX \
14614
+ | OPTION_MASK_VSX_TIMODE)
14618
@@ -166,10 +185,7 @@
14619
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
14620
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
14621
| MASK_VSX | MASK_RECIP_PRECISION)
14622
-RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */
14623
- POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
14624
- | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
14625
- | MASK_VSX | MASK_RECIP_PRECISION)
14626
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
14627
RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
14628
RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
14629
RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
14630
--- a/src/gcc/config/rs6000/t-linux64bele
14631
+++ b/src/gcc/config/rs6000/t-linux64bele
14633
+#rs6000/t-linux64end
14635
+MULTILIB_OPTIONS += mlittle
14636
+MULTILIB_DIRNAMES += le
14637
+MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))))))
14638
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES)))
14639
+MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN}
14640
--- a/src/gcc/config/rs6000/htmintrin.h
14641
+++ b/src/gcc/config/rs6000/htmintrin.h
14643
+/* Hardware Transactional Memory (HTM) intrinsics.
14644
+ Copyright (C) 2013 Free Software Foundation, Inc.
14645
+ Contributed by Peter Bergner <bergner@vnet.ibm.com>.
14647
+ This file is free software; you can redistribute it and/or modify it under
14648
+ the terms of the GNU General Public License as published by the Free
14649
+ Software Foundation; either version 3 of the License, or (at your option)
14650
+ any later version.
14652
+ This file is distributed in the hope that it will be useful, but WITHOUT
14653
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14654
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14655
+ for more details.
14657
+ Under Section 7 of GPL version 3, you are granted additional
14658
+ permissions described in the GCC Runtime Library Exception, version
14659
+ 3.1, as published by the Free Software Foundation.
14661
+ You should have received a copy of the GNU General Public License and
14662
+ a copy of the GCC Runtime Library Exception along with this program;
14663
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
14664
+ <http://www.gnu.org/licenses/>. */
14667
+# error "HTM instruction set not enabled"
14668
+#endif /* __HTM__ */
14670
+#ifndef _HTMINTRIN_H
14671
+#define _HTMINTRIN_H
14673
+#include <stdint.h>
14675
+typedef uint64_t texasr_t;
14676
+typedef uint32_t texasru_t;
14677
+typedef uint32_t texasrl_t;
14678
+typedef uintptr_t tfiar_t;
14679
+typedef uintptr_t tfhar_t;
14681
+#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)
14682
+#define _HTM_NONTRANSACTIONAL 0x0
14683
+#define _HTM_SUSPENDED 0x1
14684
+#define _HTM_TRANSACTIONAL 0x2
14686
+/* The following macros use the IBM bit numbering for BITNUM
14687
+ as used in the ISA documentation. */
14689
+#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
14690
+ (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))
14691
+#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
14692
+ (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))
14694
+#define _TEXASR_FAILURE_CODE(TEXASR) \
14695
+ _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)
14696
+#define _TEXASRU_FAILURE_CODE(TEXASRU) \
14697
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)
14699
+#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \
14700
+ _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)
14701
+#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \
14702
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)
14704
+#define _TEXASR_DISALLOWED(TEXASR) \
14705
+ _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)
14706
+#define _TEXASRU_DISALLOWED(TEXASRU) \
14707
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)
14709
+#define _TEXASR_NESTING_OVERFLOW(TEXASR) \
14710
+ _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)
14711
+#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \
14712
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)
14714
+#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \
14715
+ _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)
14716
+#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \
14717
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)
14719
+#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \
14720
+ _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)
14721
+#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \
14722
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)
14724
+#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \
14725
+ _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)
14726
+#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \
14727
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)
14729
+#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \
14730
+ _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)
14731
+#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \
14732
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)
14734
+#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \
14735
+ _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)
14736
+#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \
14737
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)
14739
+#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \
14740
+ _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)
14741
+#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \
14742
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)
14744
+#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \
14745
+ _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)
14746
+#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \
14747
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)
14749
+#define _TEXASR_ABORT(TEXASR) \
14750
+ _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)
14751
+#define _TEXASRU_ABORT(TEXASRU) \
14752
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)
14755
+#define _TEXASR_SUSPENDED(TEXASR) \
14756
+ _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)
14758
+#define _TEXASR_PRIVILEGE(TEXASR) \
14759
+ _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)
14761
+#define _TEXASR_FAILURE_SUMMARY(TEXASR) \
14762
+ _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)
14764
+#define _TEXASR_TFIAR_EXACT(TEXASR) \
14765
+ _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)
14767
+#define _TEXASR_ROT(TEXASR) \
14768
+ _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)
14770
+#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \
14771
+ _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)
14773
+#endif /* _HTMINTRIN_H */
14774
--- a/src/gcc/config/rs6000/rs6000-protos.h
14775
+++ b/src/gcc/config/rs6000/rs6000-protos.h
14776
@@ -50,12 +50,17 @@
14777
extern rtx find_addr_reg (rtx);
14778
extern rtx gen_easy_altivec_constant (rtx);
14779
extern const char *output_vec_const_move (rtx *);
14780
+extern const char *rs6000_output_move_128bit (rtx *);
14781
extern void rs6000_expand_vector_init (rtx, rtx);
14782
extern void paired_expand_vector_init (rtx, rtx);
14783
extern void rs6000_expand_vector_set (rtx, rtx, int);
14784
extern void rs6000_expand_vector_extract (rtx, rtx, int);
14785
extern bool altivec_expand_vec_perm_const (rtx op[4]);
14786
+extern void altivec_expand_vec_perm_le (rtx op[4]);
14787
extern bool rs6000_expand_vec_perm_const (rtx op[4]);
14788
+extern void altivec_expand_lvx_be (rtx, rtx, enum machine_mode, unsigned);
14789
+extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned);
14790
+extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned);
14791
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
14792
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
14793
extern void build_mask64_2_operands (rtx, rtx *);
14795
extern int registers_ok_for_quad_peep (rtx, rtx);
14796
extern int mems_ok_for_quad_peep (rtx, rtx);
14797
extern bool gpr_or_gpr_p (rtx, rtx);
14798
+extern bool direct_move_p (rtx, rtx);
14799
+extern bool quad_load_store_p (rtx, rtx);
14800
+extern bool fusion_gpr_load_p (rtx *, bool);
14801
+extern void expand_fusion_gpr_load (rtx *);
14802
+extern const char *emit_fusion_gpr_load (rtx *);
14803
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
14805
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
14806
@@ -116,6 +126,7 @@
14807
extern void rs6000_fatal_bad_address (rtx);
14808
extern rtx create_TOC_reference (rtx, rtx);
14809
extern void rs6000_split_multireg_move (rtx, rtx);
14810
+extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
14811
extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
14812
extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
14813
extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
14814
@@ -135,6 +146,7 @@
14815
extern rtx rs6000_address_for_altivec (rtx);
14816
extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
14817
extern int rs6000_loop_align (rtx);
14818
+extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx);
14819
#endif /* RTX_CODE */
14822
@@ -146,6 +158,7 @@
14823
extern rtx rs6000_libcall_value (enum machine_mode);
14824
extern rtx rs6000_va_arg (tree, tree);
14825
extern int function_ok_for_sibcall (tree);
14826
+extern int rs6000_reg_parm_stack_space (tree);
14827
extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
14828
extern bool rs6000_elf_in_small_data_p (const_tree);
14829
#ifdef ARGS_SIZE_RTX
14830
@@ -170,7 +183,8 @@
14831
extern void rs6000_emit_epilogue (int);
14832
extern void rs6000_emit_eh_reg_restore (rtx, rtx);
14833
extern const char * output_isel (rtx *);
14834
-extern void rs6000_call_indirect_aix (rtx, rtx, rtx);
14835
+extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
14836
+extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
14837
extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
14838
extern void get_ppc476_thunk_name (char name[32]);
14839
extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins);
14840
--- a/src/gcc/config/rs6000/t-rs6000
14841
+++ b/src/gcc/config/rs6000/t-rs6000
14843
$(srcdir)/config/rs6000/power5.md \
14844
$(srcdir)/config/rs6000/power6.md \
14845
$(srcdir)/config/rs6000/power7.md \
14846
+ $(srcdir)/config/rs6000/power8.md \
14847
$(srcdir)/config/rs6000/cell.md \
14848
$(srcdir)/config/rs6000/xfpu.md \
14849
$(srcdir)/config/rs6000/a2.md \
14851
$(srcdir)/config/rs6000/vector.md \
14852
$(srcdir)/config/rs6000/vsx.md \
14853
$(srcdir)/config/rs6000/altivec.md \
14854
+ $(srcdir)/config/rs6000/crypto.md \
14855
+ $(srcdir)/config/rs6000/htm.md \
14856
$(srcdir)/config/rs6000/spe.md \
14857
$(srcdir)/config/rs6000/dfp.md \
14858
$(srcdir)/config/rs6000/paired.md
14859
--- a/src/gcc/config/rs6000/htmxlintrin.h
14860
+++ b/src/gcc/config/rs6000/htmxlintrin.h
14862
+/* XL compiler Hardware Transactional Memory (HTM) execution intrinsics.
14863
+ Copyright (C) 2013 Free Software Foundation, Inc.
14864
+ Contributed by Peter Bergner <bergner@vnet.ibm.com>.
14866
+ This file is free software; you can redistribute it and/or modify it under
14867
+ the terms of the GNU General Public License as published by the Free
14868
+ Software Foundation; either version 3 of the License, or (at your option)
14869
+ any later version.
14871
+ This file is distributed in the hope that it will be useful, but WITHOUT
14872
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14873
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14874
+ for more details.
14876
+ Under Section 7 of GPL version 3, you are granted additional
14877
+ permissions described in the GCC Runtime Library Exception, version
14878
+ 3.1, as published by the Free Software Foundation.
14880
+ You should have received a copy of the GNU General Public License and
14881
+ a copy of the GCC Runtime Library Exception along with this program;
14882
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
14883
+ <http://www.gnu.org/licenses/>. */
14886
+# error "HTM instruction set not enabled"
14887
+#endif /* __HTM__ */
14889
+#ifndef _HTMXLINTRIN_H
14890
+#define _HTMXLINTRIN_H
14892
+#include <stdint.h>
14893
+#include <htmintrin.h>
14895
+#ifdef __cplusplus
14899
+#define _TEXASR_PTR(TM_BUF) \
14900
+ ((texasr_t *)((TM_BUF)+0))
14901
+#define _TEXASRU_PTR(TM_BUF) \
14902
+ ((texasru_t *)((TM_BUF)+0))
14903
+#define _TEXASRL_PTR(TM_BUF) \
14904
+ ((texasrl_t *)((TM_BUF)+4))
14905
+#define _TFIAR_PTR(TM_BUF) \
14906
+ ((tfiar_t *)((TM_BUF)+8))
14908
+typedef char TM_buff_type[16];
14910
+extern __inline long
14911
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14912
+__TM_simple_begin (void)
14914
+ if (__builtin_expect (__builtin_tbegin (0), 1))
14919
+extern __inline long
14920
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14921
+__TM_begin (void* const TM_buff)
14923
+ *_TEXASRL_PTR (TM_buff) = 0;
14924
+ if (__builtin_expect (__builtin_tbegin (0), 1))
14926
+#ifdef __powerpc64__
14927
+ *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
14929
+ *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru ();
14930
+ *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr ();
14932
+ *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar ();
14936
+extern __inline long
14937
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14940
+ if (__builtin_expect (__builtin_tend (0), 1))
14945
+extern __inline void
14946
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14949
+ __builtin_tabort (0);
14952
+extern __inline void
14953
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14954
+__TM_named_abort (unsigned char const code)
14956
+ __builtin_tabort (code);
14959
+extern __inline void
14960
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14961
+__TM_resume (void)
14963
+ __builtin_tresume ();
14966
+extern __inline void
14967
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14968
+__TM_suspend (void)
14970
+ __builtin_tsuspend ();
14973
+extern __inline long
14974
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14975
+__TM_is_user_abort (void* const TM_buff)
14977
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
14978
+ return _TEXASRU_ABORT (texasru);
14981
+extern __inline long
14982
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14983
+__TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
14985
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
14987
+ *code = _TEXASRU_FAILURE_CODE (texasru);
14988
+ return _TEXASRU_ABORT (texasru);
14991
+extern __inline long
14992
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14993
+__TM_is_illegal (void* const TM_buff)
14995
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
14996
+ return _TEXASRU_DISALLOWED (texasru);
14999
+extern __inline long
15000
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15001
+__TM_is_footprint_exceeded (void* const TM_buff)
15003
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
15004
+ return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);
15007
+extern __inline long
15008
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15009
+__TM_nesting_depth (void* const TM_buff)
15011
+ texasrl_t texasrl;
15013
+ if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)
15015
+ texasrl = *_TEXASRL_PTR (TM_buff);
15016
+ if (!_TEXASR_FAILURE_SUMMARY (texasrl))
15020
+ texasrl = (texasrl_t) __builtin_get_texasr ();
15022
+ return _TEXASR_TRANSACTION_LEVEL (texasrl);
15025
+extern __inline long
15026
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15027
+__TM_is_nested_too_deep(void* const TM_buff)
15029
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
15030
+ return _TEXASRU_NESTING_OVERFLOW (texasru);
15033
+extern __inline long
15034
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15035
+__TM_is_conflict(void* const TM_buff)
15037
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
15038
+ /* Return TEXASR bits 11 (Self-Induced Conflict) through
15039
+ 14 (Translation Invalidation Conflict). */
15040
+ return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;
15043
+extern __inline long
15044
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15045
+__TM_is_failure_persistent(void* const TM_buff)
15047
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
15048
+ return _TEXASRU_FAILURE_PERSISTENT (texasru);
15051
+extern __inline long
15052
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15053
+__TM_failure_address(void* const TM_buff)
15055
+ return *_TFIAR_PTR (TM_buff);
15058
+extern __inline long long
15059
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15060
+__TM_failure_code(void* const TM_buff)
15062
+ return *_TEXASR_PTR (TM_buff);
15065
+#ifdef __cplusplus
15069
+#endif /* _HTMXLINTRIN_H */
15070
--- a/src/gcc/config/rs6000/rs6000-builtin.def
15071
+++ b/src/gcc/config/rs6000/rs6000-builtin.def
15073
RS6000_BUILTIN_A -- ABS builtins
15074
RS6000_BUILTIN_D -- DST builtins
15075
RS6000_BUILTIN_E -- SPE EVSEL builtins.
15076
- RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
15077
+ RS6000_BUILTIN_H -- HTM builtins
15078
+ RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
15079
RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
15080
RS6000_BUILTIN_S -- SPE predicate builtins
15081
RS6000_BUILTIN_X -- special builtins
15083
#error "RS6000_BUILTIN_E is not defined."
15086
+#ifndef RS6000_BUILTIN_H
15087
+ #error "RS6000_BUILTIN_H is not defined."
15090
#ifndef RS6000_BUILTIN_P
15091
#error "RS6000_BUILTIN_P is not defined."
15093
@@ -301,6 +306,158 @@
15094
| RS6000_BTC_SPECIAL), \
15095
CODE_FOR_nothing) /* ICODE */
15097
+/* ISA 2.07 (power8) vector convenience macros. */
15098
+/* For the instructions that are encoded as altivec instructions use
15099
+ __builtin_altivec_ as the builtin name. */
15100
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \
15101
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
15102
+ "__builtin_altivec_" NAME, /* NAME */ \
15103
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15104
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15105
+ | RS6000_BTC_UNARY), \
15106
+ CODE_FOR_ ## ICODE) /* ICODE */
15108
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \
15109
+ RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
15110
+ "__builtin_altivec_" NAME, /* NAME */ \
15111
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15112
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15113
+ | RS6000_BTC_BINARY), \
15114
+ CODE_FOR_ ## ICODE) /* ICODE */
15116
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \
15117
+ RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
15118
+ "__builtin_altivec_" NAME, /* NAME */ \
15119
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15120
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15121
+ | RS6000_BTC_PREDICATE), \
15122
+ CODE_FOR_ ## ICODE) /* ICODE */
15124
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
15126
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \
15127
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
15128
+ "__builtin_vsx_" NAME, /* NAME */ \
15129
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15130
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15131
+ | RS6000_BTC_UNARY), \
15132
+ CODE_FOR_ ## ICODE) /* ICODE */
15134
+#define BU_P8V_OVERLOAD_1(ENUM, NAME) \
15135
+ RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
15136
+ "__builtin_vec_" NAME, /* NAME */ \
15137
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15138
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
15139
+ | RS6000_BTC_UNARY), \
15140
+ CODE_FOR_nothing) /* ICODE */
15142
+#define BU_P8V_OVERLOAD_2(ENUM, NAME) \
15143
+ RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
15144
+ "__builtin_vec_" NAME, /* NAME */ \
15145
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
15146
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
15147
+ | RS6000_BTC_BINARY), \
15148
+ CODE_FOR_nothing) /* ICODE */
15150
+/* Crypto convenience macros. */
15151
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \
15152
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15153
+ "__builtin_crypto_" NAME, /* NAME */ \
15154
+ RS6000_BTM_CRYPTO, /* MASK */ \
15155
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15156
+ | RS6000_BTC_UNARY), \
15157
+ CODE_FOR_ ## ICODE) /* ICODE */
15159
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \
15160
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15161
+ "__builtin_crypto_" NAME, /* NAME */ \
15162
+ RS6000_BTM_CRYPTO, /* MASK */ \
15163
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15164
+ | RS6000_BTC_BINARY), \
15165
+ CODE_FOR_ ## ICODE) /* ICODE */
15167
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \
15168
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15169
+ "__builtin_crypto_" NAME, /* NAME */ \
15170
+ RS6000_BTM_CRYPTO, /* MASK */ \
15171
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15172
+ | RS6000_BTC_TERNARY), \
15173
+ CODE_FOR_ ## ICODE) /* ICODE */
15175
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \
15176
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15177
+ "__builtin_crypto_" NAME, /* NAME */ \
15178
+ RS6000_BTM_CRYPTO, /* MASK */ \
15179
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
15180
+ | RS6000_BTC_UNARY), \
15181
+ CODE_FOR_nothing) /* ICODE */
15183
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \
15184
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15185
+ "__builtin_crypto_" NAME, /* NAME */ \
15186
+ RS6000_BTM_CRYPTO, /* MASK */ \
15187
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
15188
+ | RS6000_BTC_BINARY), \
15189
+ CODE_FOR_nothing) /* ICODE */
15191
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \
15192
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
15193
+ "__builtin_crypto_" NAME, /* NAME */ \
15194
+ RS6000_BTM_CRYPTO, /* MASK */ \
15195
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
15196
+ | RS6000_BTC_TERNARY), \
15197
+ CODE_FOR_nothing) /* ICODE */
15199
+/* HTM convenience macros. */
15200
+#define BU_HTM_0(ENUM, NAME, ATTR, ICODE) \
15201
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15202
+ "__builtin_" NAME, /* NAME */ \
15203
+ RS6000_BTM_HTM, /* MASK */ \
15204
+ RS6000_BTC_ ## ATTR, /* ATTR */ \
15205
+ CODE_FOR_ ## ICODE) /* ICODE */
15207
+#define BU_HTM_1(ENUM, NAME, ATTR, ICODE) \
15208
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15209
+ "__builtin_" NAME, /* NAME */ \
15210
+ RS6000_BTM_HTM, /* MASK */ \
15211
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15212
+ | RS6000_BTC_UNARY), \
15213
+ CODE_FOR_ ## ICODE) /* ICODE */
15215
+#define BU_HTM_2(ENUM, NAME, ATTR, ICODE) \
15216
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15217
+ "__builtin_" NAME, /* NAME */ \
15218
+ RS6000_BTM_HTM, /* MASK */ \
15219
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15220
+ | RS6000_BTC_BINARY), \
15221
+ CODE_FOR_ ## ICODE) /* ICODE */
15223
+#define BU_HTM_3(ENUM, NAME, ATTR, ICODE) \
15224
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15225
+ "__builtin_" NAME, /* NAME */ \
15226
+ RS6000_BTM_HTM, /* MASK */ \
15227
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15228
+ | RS6000_BTC_TERNARY), \
15229
+ CODE_FOR_ ## ICODE) /* ICODE */
15231
+#define BU_HTM_SPR0(ENUM, NAME, ATTR, ICODE) \
15232
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15233
+ "__builtin_" NAME, /* NAME */ \
15234
+ RS6000_BTM_HTM, /* MASK */ \
15235
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15236
+ | RS6000_BTC_SPR), \
15237
+ CODE_FOR_ ## ICODE) /* ICODE */
15239
+#define BU_HTM_SPR1(ENUM, NAME, ATTR, ICODE) \
15240
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
15241
+ "__builtin_" NAME, /* NAME */ \
15242
+ RS6000_BTM_HTM, /* MASK */ \
15243
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
15244
+ | RS6000_BTC_UNARY \
15245
+ | RS6000_BTC_SPR \
15246
+ | RS6000_BTC_VOID), \
15247
+ CODE_FOR_ ## ICODE) /* ICODE */
15249
/* SPE convenience macros. */
15250
#define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \
15251
RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \
15252
@@ -636,8 +793,26 @@
15253
BU_ALTIVEC_X (LVEHX, "lvehx", MEM)
15254
BU_ALTIVEC_X (LVEWX, "lvewx", MEM)
15255
BU_ALTIVEC_X (LVXL, "lvxl", MEM)
15256
+BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", MEM)
15257
+BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", MEM)
15258
+BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf", MEM)
15259
+BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si", MEM)
15260
+BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi", MEM)
15261
+BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", MEM)
15262
BU_ALTIVEC_X (LVX, "lvx", MEM)
15263
+BU_ALTIVEC_X (LVX_V2DF, "lvx_v2df", MEM)
15264
+BU_ALTIVEC_X (LVX_V2DI, "lvx_v2di", MEM)
15265
+BU_ALTIVEC_X (LVX_V4SF, "lvx_v4sf", MEM)
15266
+BU_ALTIVEC_X (LVX_V4SI, "lvx_v4si", MEM)
15267
+BU_ALTIVEC_X (LVX_V8HI, "lvx_v8hi", MEM)
15268
+BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi", MEM)
15269
BU_ALTIVEC_X (STVX, "stvx", MEM)
15270
+BU_ALTIVEC_X (STVX_V2DF, "stvx_v2df", MEM)
15271
+BU_ALTIVEC_X (STVX_V2DI, "stvx_v2di", MEM)
15272
+BU_ALTIVEC_X (STVX_V4SF, "stvx_v4sf", MEM)
15273
+BU_ALTIVEC_X (STVX_V4SI, "stvx_v4si", MEM)
15274
+BU_ALTIVEC_X (STVX_V8HI, "stvx_v8hi", MEM)
15275
+BU_ALTIVEC_X (STVX_V16QI, "stvx_v16qi", MEM)
15276
BU_ALTIVEC_C (LVLX, "lvlx", MEM)
15277
BU_ALTIVEC_C (LVLXL, "lvlxl", MEM)
15278
BU_ALTIVEC_C (LVRX, "lvrx", MEM)
15279
@@ -646,6 +821,12 @@
15280
BU_ALTIVEC_X (STVEHX, "stvehx", MEM)
15281
BU_ALTIVEC_X (STVEWX, "stvewx", MEM)
15282
BU_ALTIVEC_X (STVXL, "stvxl", MEM)
15283
+BU_ALTIVEC_X (STVXL_V2DF, "stvxl_v2df", MEM)
15284
+BU_ALTIVEC_X (STVXL_V2DI, "stvxl_v2di", MEM)
15285
+BU_ALTIVEC_X (STVXL_V4SF, "stvxl_v4sf", MEM)
15286
+BU_ALTIVEC_X (STVXL_V4SI, "stvxl_v4si", MEM)
15287
+BU_ALTIVEC_X (STVXL_V8HI, "stvxl_v8hi", MEM)
15288
+BU_ALTIVEC_X (STVXL_V16QI, "stvxl_v16qi", MEM)
15289
BU_ALTIVEC_C (STVLX, "stvlx", MEM)
15290
BU_ALTIVEC_C (STVLXL, "stvlxl", MEM)
15291
BU_ALTIVEC_C (STVRX, "stvrx", MEM)
15292
@@ -1012,7 +1193,7 @@
15293
BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2)
15295
BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp)
15296
-BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp)
15297
+BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp)
15298
BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp)
15299
BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp)
15300
BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe)
15301
@@ -1052,9 +1233,9 @@
15303
BU_VSX_1 (XSRDPI, "xsrdpi", CONST, vsx_xsrdpi)
15304
BU_VSX_1 (XSRDPIC, "xsrdpic", CONST, vsx_xsrdpic)
15305
-BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, vsx_floordf2)
15306
-BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, vsx_ceildf2)
15307
-BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, vsx_btruncdf2)
15308
+BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, floordf2)
15309
+BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, ceildf2)
15310
+BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, btruncdf2)
15312
/* VSX predicate functions. */
15313
BU_VSX_P (XVCMPEQSP_P, "xvcmpeqsp_p", CONST, vector_eq_v4sf_p)
15314
@@ -1132,6 +1313,166 @@
15315
BU_VSX_OVERLOAD_X (LD, "ld")
15316
BU_VSX_OVERLOAD_X (ST, "st")
15318
+/* 1 argument VSX instructions added in ISA 2.07. */
15319
+BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
15320
+BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
15322
+/* 1 argument altivec instructions added in ISA 2.07. */
15323
+BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
15324
+BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw)
15325
+BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw)
15326
+BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2)
15327
+BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2)
15328
+BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2)
15329
+BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2)
15330
+BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2)
15331
+BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2)
15332
+BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2)
15333
+BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2)
15334
+BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
15336
+/* 2 argument altivec instructions added in ISA 2.07. */
15337
+BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
15338
+BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
15339
+BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
15340
+BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
15341
+BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
15342
+BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
15343
+BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
15344
+BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
15345
+BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
15346
+BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
15347
+BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus)
15348
+BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3)
15349
+BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
15350
+BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
15351
+BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
15352
+BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
15354
+BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
15355
+BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
15356
+BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
15357
+BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
15358
+BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
15359
+BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
15361
+BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3)
15362
+BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
15363
+BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
15364
+BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
15365
+BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
15366
+BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
15368
+BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3)
15369
+BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
15370
+BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
15371
+BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
15372
+BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
15373
+BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
15375
+/* Vector comparison instructions added in ISA 2.07. */
15376
+BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
15377
+BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
15378
+BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di)
15380
+/* Vector comparison predicate instructions added in ISA 2.07. */
15381
+BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p)
15382
+BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p)
15383
+BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p)
15385
+/* ISA 2.07 vector overloaded 1 argument functions. */
15386
+BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw")
15387
+BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw")
15388
+BU_P8V_OVERLOAD_1 (VCLZ, "vclz")
15389
+BU_P8V_OVERLOAD_1 (VCLZB, "vclzb")
15390
+BU_P8V_OVERLOAD_1 (VCLZH, "vclzh")
15391
+BU_P8V_OVERLOAD_1 (VCLZW, "vclzw")
15392
+BU_P8V_OVERLOAD_1 (VCLZD, "vclzd")
15393
+BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt")
15394
+BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb")
15395
+BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth")
15396
+BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw")
15397
+BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd")
15398
+BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
15400
+/* ISA 2.07 vector overloaded 2 argument functions. */
15401
+BU_P8V_OVERLOAD_2 (EQV, "eqv")
15402
+BU_P8V_OVERLOAD_2 (NAND, "nand")
15403
+BU_P8V_OVERLOAD_2 (ORC, "orc")
15404
+BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
15405
+BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
15406
+BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
15407
+BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
15408
+BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
15409
+BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
15410
+BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
15411
+BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
15412
+BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
15413
+BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
15414
+BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus")
15415
+BU_P8V_OVERLOAD_2 (VRLD, "vrld")
15416
+BU_P8V_OVERLOAD_2 (VSLD, "vsld")
15417
+BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
15418
+BU_P8V_OVERLOAD_2 (VSRD, "vsrd")
15419
+BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm")
15422
+/* 1 argument crypto functions. */
15423
+BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox)
15425
+/* 2 argument crypto functions. */
15426
+BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher)
15427
+BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast)
15428
+BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher)
15429
+BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast)
15430
+BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb)
15431
+BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh)
15432
+BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw)
15433
+BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd)
15435
+/* 3 argument crypto functions. */
15436
+BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di)
15437
+BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si)
15438
+BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi)
15439
+BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
15440
+BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw)
15441
+BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad)
15443
+/* 2 argument crypto overloaded functions. */
15444
+BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum")
15446
+/* 3 argument crypto overloaded functions. */
15447
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor")
15448
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
15451
+/* HTM functions. */
15452
+BU_HTM_1 (TABORT, "tabort", MISC, tabort)
15453
+BU_HTM_3 (TABORTDC, "tabortdc", MISC, tabortdc)
15454
+BU_HTM_3 (TABORTDCI, "tabortdci", MISC, tabortdci)
15455
+BU_HTM_3 (TABORTWC, "tabortwc", MISC, tabortwc)
15456
+BU_HTM_3 (TABORTWCI, "tabortwci", MISC, tabortwci)
15457
+BU_HTM_1 (TBEGIN, "tbegin", MISC, tbegin)
15458
+BU_HTM_1 (TCHECK, "tcheck", MISC, tcheck)
15459
+BU_HTM_1 (TEND, "tend", MISC, tend)
15460
+BU_HTM_0 (TENDALL, "tendall", MISC, tend)
15461
+BU_HTM_0 (TRECHKPT, "trechkpt", MISC, trechkpt)
15462
+BU_HTM_1 (TRECLAIM, "treclaim", MISC, treclaim)
15463
+BU_HTM_0 (TRESUME, "tresume", MISC, tsr)
15464
+BU_HTM_0 (TSUSPEND, "tsuspend", MISC, tsr)
15465
+BU_HTM_1 (TSR, "tsr", MISC, tsr)
15466
+BU_HTM_0 (TTEST, "ttest", MISC, ttest)
15468
+BU_HTM_SPR0 (GET_TFHAR, "get_tfhar", MISC, nothing)
15469
+BU_HTM_SPR1 (SET_TFHAR, "set_tfhar", MISC, nothing)
15470
+BU_HTM_SPR0 (GET_TFIAR, "get_tfiar", MISC, nothing)
15471
+BU_HTM_SPR1 (SET_TFIAR, "set_tfiar", MISC, nothing)
15472
+BU_HTM_SPR0 (GET_TEXASR, "get_texasr", MISC, nothing)
15473
+BU_HTM_SPR1 (SET_TEXASR, "set_texasr", MISC, nothing)
15474
+BU_HTM_SPR0 (GET_TEXASRU, "get_texasru", MISC, nothing)
15475
+BU_HTM_SPR1 (SET_TEXASRU, "set_texasru", MISC, nothing)
15478
/* 3 argument paired floating point builtins. */
15479
BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4)
15480
BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4)
15481
@@ -1430,10 +1771,10 @@
15484
BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
15485
- RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
15486
+ RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
15488
BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
15489
- RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
15490
+ RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
15492
/* Darwin CfString builtin. */
15493
BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
15494
--- a/src/gcc/config/rs6000/rs6000-c.c
15495
+++ b/src/gcc/config/rs6000/rs6000-c.c
15496
@@ -315,6 +315,8 @@
15497
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
15498
if ((flags & OPTION_MASK_POPCNTD) != 0)
15499
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
15500
+ if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
15501
+ rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
15502
if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
15503
rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
15504
if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
15505
@@ -331,6 +333,16 @@
15507
if ((flags & OPTION_MASK_VSX) != 0)
15508
rs6000_define_or_undefine_macro (define_p, "__VSX__");
15509
+ if ((flags & OPTION_MASK_HTM) != 0)
15510
+ rs6000_define_or_undefine_macro (define_p, "__HTM__");
15511
+ if ((flags & OPTION_MASK_P8_VECTOR) != 0)
15512
+ rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
15513
+ if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
15514
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
15515
+ if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
15516
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
15517
+ if ((flags & OPTION_MASK_CRYPTO) != 0)
15518
+ rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
15520
/* options from the builtin masks. */
15521
if ((bu_mask & RS6000_BTM_SPE) != 0)
15522
@@ -453,7 +465,11 @@
15524
builtin_define ("_CALL_AIXDESC");
15525
builtin_define ("_CALL_AIX");
15526
+ builtin_define ("_CALL_ELF=1");
15529
+ builtin_define ("_CALL_ELF=2");
15532
builtin_define ("_CALL_DARWIN");
15534
@@ -465,6 +481,13 @@
15535
if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
15536
builtin_define ("__NO_FPRS__");
15538
+ /* Whether aggregates passed by value are aligned to a 16 byte boundary
15539
+ if their alignment is 16 bytes or larger. */
15540
+ if ((TARGET_MACHO && rs6000_darwin64_abi)
15541
+ || DEFAULT_ABI == ABI_ELFv2
15542
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
15543
+ builtin_define ("__STRUCT_PARM_ALIGN__=16");
15545
/* Generate defines for Xilinx FPU. */
15546
if (rs6000_xilinx_fpu)
15548
@@ -505,6 +528,8 @@
15549
RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
15550
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
15551
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
15552
+ { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
15553
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
15554
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
15555
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
15556
{ ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
15557
@@ -577,6 +602,10 @@
15558
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
15559
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
15560
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
15561
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
15562
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
15563
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
15564
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
15565
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
15566
RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
15567
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
15568
@@ -583,6 +612,10 @@
15569
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
15570
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
15571
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
15572
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
15573
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
15574
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
15575
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
15576
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
15577
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
15578
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
15579
@@ -601,6 +634,10 @@
15580
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
15581
{ ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
15582
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
15583
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
15584
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
15585
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
15586
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
15587
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
15588
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
15589
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
15590
@@ -651,6 +688,18 @@
15591
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
15592
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
15593
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15594
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15595
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
15596
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15597
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
15598
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15599
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15600
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15601
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15602
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15603
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
15604
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
15605
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15606
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
15607
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15608
{ ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
15609
@@ -937,6 +986,10 @@
15610
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15611
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
15612
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15613
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
15614
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15615
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
15616
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15617
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
15618
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15619
{ ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
15620
@@ -975,6 +1028,10 @@
15621
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15622
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
15623
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15624
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
15625
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15626
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
15627
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15628
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
15629
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15630
{ ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
15631
@@ -1021,6 +1078,10 @@
15632
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15633
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
15634
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15635
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
15636
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15637
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
15638
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15639
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
15640
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15641
{ ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
15642
@@ -1045,54 +1106,54 @@
15643
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15644
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
15645
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
15646
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15647
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF,
15648
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
15649
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15650
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
15651
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
15652
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15653
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
15654
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
15655
~RS6000_BTI_unsigned_V2DI, 0 },
15656
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15657
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
15658
RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
15659
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15660
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
15661
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
15662
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15663
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
15664
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
15665
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15666
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15667
RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
15668
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15669
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15670
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
15671
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15672
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15673
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
15674
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15675
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15676
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
15677
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15678
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15679
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
15680
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15681
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15682
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
15683
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15684
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
15685
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
15686
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15687
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15688
RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
15689
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15690
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15691
RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
15692
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15693
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15694
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
15695
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15696
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15697
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
15698
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15699
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15700
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
15701
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15702
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
15703
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
15704
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15705
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
15706
RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
15707
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15708
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
15709
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
15710
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15711
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
15712
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
15713
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15714
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
15715
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
15716
- { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
15717
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
15718
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
15719
{ ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
15720
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
15721
@@ -1130,55 +1191,55 @@
15722
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
15723
{ ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
15724
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
15725
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15726
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
15727
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
15728
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15729
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
15730
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
15731
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15732
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15733
RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
15734
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15735
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15736
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
15737
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15738
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15739
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
15740
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15741
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15742
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
15743
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15744
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15745
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
15746
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15747
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15748
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
15749
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15750
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
15751
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
15752
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15753
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15754
RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
15755
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15756
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15757
RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
15758
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15759
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15760
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
15761
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15762
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15763
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
15764
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15765
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15766
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
15767
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15768
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
15769
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
15770
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15771
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
15772
RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
15773
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15774
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
15775
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
15776
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15777
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
15778
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
15779
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15780
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
15781
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
15782
~RS6000_BTI_unsigned_V16QI, 0 },
15783
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15784
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
15785
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
15786
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15787
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DF,
15788
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
15789
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15790
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
15791
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
15792
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15793
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
15794
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
15795
~RS6000_BTI_unsigned_V2DI, 0 },
15796
- { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
15797
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
15798
RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
15799
{ ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
15800
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
15801
@@ -1418,6 +1479,18 @@
15802
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
15803
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
15804
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15805
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
15806
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15807
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
15808
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
15809
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
15810
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15811
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
15812
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
15813
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
15814
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
15815
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
15816
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15817
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
15818
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15819
{ ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
15820
@@ -1604,6 +1677,18 @@
15821
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
15822
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
15823
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15824
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
15825
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15826
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
15827
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
15828
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
15829
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15830
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
15831
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
15832
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
15833
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
15834
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
15835
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15836
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
15837
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15838
{ ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
15839
@@ -1786,6 +1871,12 @@
15840
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15841
{ ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
15842
RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
15843
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
15844
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15845
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
15846
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15847
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
15848
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
15849
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
15850
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15851
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
15852
@@ -1812,6 +1903,10 @@
15853
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15854
{ ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
15855
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15856
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
15857
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15858
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
15859
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15860
{ ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
15861
RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
15862
{ ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
15863
@@ -1824,6 +1919,8 @@
15864
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15865
{ ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
15866
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15867
+ { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
15868
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15869
{ ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
15870
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
15871
{ ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
15872
@@ -1844,6 +1941,10 @@
15873
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15874
{ ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
15875
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15876
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
15877
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15878
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
15879
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15880
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
15881
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15882
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
15883
@@ -1868,6 +1969,10 @@
15884
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15885
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
15886
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15887
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
15888
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15889
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
15890
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15891
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
15892
RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
15893
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
15894
@@ -2032,6 +2137,10 @@
15895
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15896
{ ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
15897
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15898
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
15899
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15900
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
15901
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15902
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
15903
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15904
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
15905
@@ -2056,6 +2165,10 @@
15906
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15907
{ ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
15908
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15909
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
15910
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15911
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
15912
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15913
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
15914
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15915
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
15916
@@ -2196,6 +2309,18 @@
15917
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
15918
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
15919
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
15920
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15921
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
15922
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15923
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
15924
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15925
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
15926
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15927
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15928
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15929
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
15930
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
15931
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
15932
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
15933
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
15934
{ ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
15935
@@ -2730,63 +2855,63 @@
15936
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE },
15937
{ ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
15938
RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
15939
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15940
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DF,
15941
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
15942
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15943
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
15944
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
15945
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15946
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
15947
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
15948
~RS6000_BTI_unsigned_V2DI },
15949
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15950
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
15951
RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
15952
~RS6000_BTI_bool_V2DI },
15953
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15954
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
15955
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
15956
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15957
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
15958
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
15959
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15960
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15961
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
15962
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15963
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15964
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
15965
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15966
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15967
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
15968
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15969
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15970
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
15971
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15972
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15973
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
15974
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15975
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15976
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
15977
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15978
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
15979
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
15980
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15981
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15982
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
15983
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15984
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15985
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
15986
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15987
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15988
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
15989
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15990
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15991
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
15992
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15993
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15994
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
15995
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15996
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
15997
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
15998
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
15999
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
16000
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
16001
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16002
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16003
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
16004
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16005
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16006
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
16007
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16008
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16009
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
16010
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16011
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16012
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
16013
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16014
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16015
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
16016
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16017
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16018
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
16019
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16020
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
16021
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
16022
- { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
16023
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
16024
RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
16025
{ ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
16026
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
16027
@@ -2858,64 +2983,64 @@
16028
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
16029
{ ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
16030
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
16031
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16032
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
16033
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
16034
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16035
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
16036
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
16037
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16038
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16039
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
16040
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16041
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16042
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
16043
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16044
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16045
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
16046
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16047
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16048
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
16049
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16050
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16051
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
16052
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16053
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16054
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
16055
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16056
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
16057
RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
16058
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16059
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16060
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
16061
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16062
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16063
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
16064
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16065
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16066
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
16067
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16068
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16069
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
16070
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16071
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16072
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
16073
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16074
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16075
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
16076
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16077
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16078
RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
16079
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16080
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16081
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
16082
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16083
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16084
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
16085
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16086
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16087
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
16088
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16089
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16090
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
16091
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16092
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16093
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
16094
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16095
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16096
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
16097
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16098
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
16099
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
16100
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16101
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
16102
RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
16103
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16104
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
16105
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
16106
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16107
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
16108
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
16109
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16110
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
16111
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
16112
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16113
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
16114
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
16115
~RS6000_BTI_unsigned_V2DI },
16116
- { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
16117
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
16118
RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
16119
~RS6000_BTI_bool_V2DI },
16120
{ ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
16121
@@ -3327,6 +3452,20 @@
16122
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
16123
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
16124
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
16125
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16126
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
16127
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16128
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
16129
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16130
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
16131
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16132
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
16133
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16134
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
16135
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16136
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
16137
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
16138
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
16139
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
16140
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
16141
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
16142
@@ -3372,11 +3511,455 @@
16143
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
16144
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
16145
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
16146
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
16147
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
16148
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
16149
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
16150
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
16151
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
16152
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
16153
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
16154
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
16155
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
16156
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
16157
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
16158
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
16159
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
16160
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
16161
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
16163
+ /* Power8 vector overloaded functions. */
16164
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16165
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
16166
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16167
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
16168
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16169
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
16170
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16171
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
16172
+ RS6000_BTI_unsigned_V16QI, 0 },
16173
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16174
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16175
+ RS6000_BTI_bool_V16QI, 0 },
16176
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
16177
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16178
+ RS6000_BTI_unsigned_V16QI, 0 },
16179
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16180
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
16181
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16182
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
16183
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16184
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
16185
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16186
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
16187
+ RS6000_BTI_unsigned_V8HI, 0 },
16188
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16189
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16190
+ RS6000_BTI_bool_V8HI, 0 },
16191
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
16192
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16193
+ RS6000_BTI_unsigned_V8HI, 0 },
16194
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16195
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
16196
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16197
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
16198
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16199
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
16200
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16201
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
16202
+ RS6000_BTI_unsigned_V4SI, 0 },
16203
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16204
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16205
+ RS6000_BTI_bool_V4SI, 0 },
16206
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
16207
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16208
+ RS6000_BTI_unsigned_V4SI, 0 },
16209
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16210
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16211
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16212
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16213
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16214
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16215
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16216
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
16217
+ RS6000_BTI_unsigned_V2DI, 0 },
16218
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16219
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16220
+ RS6000_BTI_bool_V2DI, 0 },
16221
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
16222
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16223
+ RS6000_BTI_unsigned_V2DI, 0 },
16224
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
16225
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
16226
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
16227
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
16229
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16230
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
16231
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16232
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
16233
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16234
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
16235
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16236
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
16237
+ RS6000_BTI_unsigned_V16QI, 0 },
16238
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16239
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16240
+ RS6000_BTI_bool_V16QI, 0 },
16241
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
16242
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16243
+ RS6000_BTI_unsigned_V16QI, 0 },
16244
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16245
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
16246
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16247
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
16248
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16249
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
16250
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16251
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
16252
+ RS6000_BTI_unsigned_V8HI, 0 },
16253
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16254
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16255
+ RS6000_BTI_bool_V8HI, 0 },
16256
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
16257
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16258
+ RS6000_BTI_unsigned_V8HI, 0 },
16259
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16260
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
16261
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16262
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
16263
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16264
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
16265
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16266
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
16267
+ RS6000_BTI_unsigned_V4SI, 0 },
16268
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16269
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16270
+ RS6000_BTI_bool_V4SI, 0 },
16271
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
16272
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16273
+ RS6000_BTI_unsigned_V4SI, 0 },
16274
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16275
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16276
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16277
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16278
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16279
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16280
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16281
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
16282
+ RS6000_BTI_unsigned_V2DI, 0 },
16283
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16284
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16285
+ RS6000_BTI_bool_V2DI, 0 },
16286
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
16287
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16288
+ RS6000_BTI_unsigned_V2DI, 0 },
16289
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
16290
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
16291
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
16292
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
16294
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16295
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
16296
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16297
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
16298
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16299
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
16300
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16301
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
16302
+ RS6000_BTI_unsigned_V16QI, 0 },
16303
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16304
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16305
+ RS6000_BTI_bool_V16QI, 0 },
16306
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
16307
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16308
+ RS6000_BTI_unsigned_V16QI, 0 },
16309
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16310
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
16311
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16312
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
16313
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16314
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
16315
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16316
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
16317
+ RS6000_BTI_unsigned_V8HI, 0 },
16318
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16319
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16320
+ RS6000_BTI_bool_V8HI, 0 },
16321
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
16322
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16323
+ RS6000_BTI_unsigned_V8HI, 0 },
16324
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16325
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
16326
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16327
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
16328
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16329
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
16330
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16331
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
16332
+ RS6000_BTI_unsigned_V4SI, 0 },
16333
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16334
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16335
+ RS6000_BTI_bool_V4SI, 0 },
16336
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
16337
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16338
+ RS6000_BTI_unsigned_V4SI, 0 },
16339
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16340
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16341
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16342
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16343
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16344
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16345
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16346
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
16347
+ RS6000_BTI_unsigned_V2DI, 0 },
16348
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16349
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16350
+ RS6000_BTI_bool_V2DI, 0 },
16351
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
16352
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16353
+ RS6000_BTI_unsigned_V2DI, 0 },
16354
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
16355
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
16356
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
16357
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
16359
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16360
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16361
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16362
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16363
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16364
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16365
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16366
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16367
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16368
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
16369
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
16370
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16372
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
16373
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
16374
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
16375
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
16376
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
16377
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
16378
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
16379
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
16380
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
16381
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
16382
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
16383
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
16384
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
16385
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
16386
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
16387
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
16389
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
16390
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
16391
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
16392
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
16394
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
16395
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
16396
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
16397
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
16399
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
16400
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
16401
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
16402
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
16404
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
16405
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
16406
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
16407
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
16409
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
16410
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
16411
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
16412
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
16414
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
16415
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16416
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
16417
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16418
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
16419
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16421
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
16422
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16423
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
16424
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16425
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
16426
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16428
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
16429
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
16430
+ RS6000_BTI_unsigned_V2DI, 0 },
16431
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
16432
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16433
+ RS6000_BTI_bool_V2DI, 0 },
16434
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
16435
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16436
+ RS6000_BTI_unsigned_V2DI, 0 },
16438
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
16439
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
16440
+ RS6000_BTI_unsigned_V2DI, 0 },
16441
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
16442
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16443
+ RS6000_BTI_bool_V2DI, 0 },
16444
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
16445
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16446
+ RS6000_BTI_unsigned_V2DI, 0 },
16448
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
16449
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
16450
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
16451
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16452
+ RS6000_BTI_unsigned_V4SI, 0 },
16454
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
16455
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
16456
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
16457
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16458
+ RS6000_BTI_unsigned_V4SI, 0 },
16460
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
16461
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
16462
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
16463
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
16464
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
16465
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
16466
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
16467
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
16468
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
16469
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
16470
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
16471
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
16472
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
16473
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
16474
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
16475
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
16477
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
16478
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
16479
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
16480
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
16482
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
16483
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
16484
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
16485
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
16487
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
16488
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
16489
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
16490
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
16492
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
16493
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
16494
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
16495
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
16497
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
16498
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16499
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
16500
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16501
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
16502
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
16504
+ { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
16505
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16507
+ { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
16508
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16510
+ { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
16511
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16513
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
16514
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16515
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
16516
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16518
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
16519
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16520
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
16521
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16523
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
16524
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16525
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
16526
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16528
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
16529
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16530
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
16531
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16533
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16534
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
16535
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16536
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
16537
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16538
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
16539
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16540
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16541
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16542
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
16543
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
16544
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
16546
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
16547
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
16548
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
16549
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
16551
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
16552
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
16553
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
16554
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
16556
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
16557
+ RS6000_BTI_V16QI, 0, 0, 0 },
16558
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
16559
+ RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
16561
+ /* Crypto builtins. */
16562
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
16563
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16564
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
16565
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
16566
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16567
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
16568
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
16569
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16570
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
16571
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
16572
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16573
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
16575
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
16576
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
16577
+ RS6000_BTI_unsigned_V16QI, 0 },
16578
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
16579
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
16580
+ RS6000_BTI_unsigned_V8HI, 0 },
16581
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
16582
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16583
+ RS6000_BTI_unsigned_V4SI, 0 },
16584
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
16585
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16586
+ RS6000_BTI_unsigned_V2DI, 0 },
16588
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
16589
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
16590
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
16591
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
16592
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
16593
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
16595
{ (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
16598
@@ -3591,7 +4174,7 @@
16599
return build_constructor (type, vec);
16602
- /* For now use pointer tricks to do the extaction, unless we are on VSX
16603
+ /* For now use pointer tricks to do the extraction, unless we are on VSX
16604
extracting a double from a constant offset. */
16605
if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
16607
@@ -3619,6 +4202,17 @@
16608
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
16611
+ /* If we are targeting little-endian, but -maltivec=be has been
16612
+ specified to override the element order, adjust the element
16613
+ number accordingly. */
16614
+ if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
16616
+ unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
16617
+ arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
16618
+ build_int_cstu (TREE_TYPE (arg2), last_elem),
16622
/* If we can use the VSX xxpermdi instruction, use that for extract. */
16623
mode = TYPE_MODE (arg1_type);
16624
if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
16625
@@ -3666,7 +4260,7 @@
16629
- /* For now use pointer tricks to do the insertation, unless we are on VSX
16630
+ /* For now use pointer tricks to do the insertion, unless we are on VSX
16631
inserting a double to a constant offset.. */
16632
if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
16634
@@ -3696,6 +4290,17 @@
16635
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
16638
+ /* If we are targeting little-endian, but -maltivec=be has been
16639
+ specified to override the element order, adjust the element
16640
+ number accordingly. */
16641
+ if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
16643
+ unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
16644
+ arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
16645
+ build_int_cstu (TREE_TYPE (arg2), last_elem),
16649
/* If we can use the VSX xxpermdi instruction, use that for insert. */
16650
mode = TYPE_MODE (arg1_type);
16651
if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
16652
@@ -3824,7 +4429,8 @@
16653
&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
16654
|| rs6000_builtin_type_compatible (types[1], desc->op2))
16655
&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
16656
- || rs6000_builtin_type_compatible (types[2], desc->op3)))
16657
+ || rs6000_builtin_type_compatible (types[2], desc->op3))
16658
+ && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
16659
return altivec_build_resolved_builtin (args, n, desc);
16662
--- a/src/gcc/config/rs6000/rs6000.opt
16663
+++ b/src/gcc/config/rs6000/rs6000.opt
16665
; Options for the rs6000 port of the compiler
16667
-; Copyright (C) 2005-2013 Free Software Foundation, Inc.
16668
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
16669
; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
16671
; This file is part of GCC.
16672
@@ -137,6 +137,14 @@
16673
Target Report Mask(ALTIVEC) Var(rs6000_isa_flags)
16674
Use AltiVec instructions
16677
+Target Report RejectNegative Var(rs6000_altivec_element_order, 1) Save
16678
+Generate Altivec instructions using little-endian element order
16681
+Target Report RejectNegative Var(rs6000_altivec_element_order, 2)
16682
+Generate Altivec instructions using big-endian element order
16685
Target Report Mask(DFP) Var(rs6000_isa_flags)
16686
Use decimal floating point instructions
16687
@@ -181,13 +189,16 @@
16688
Target Report Mask(VSX) Var(rs6000_isa_flags)
16689
Use vector/scalar (VSX) instructions
16692
+Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1)
16693
+; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default)
16696
-Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1)
16697
-; If -mvsx, use VSX arithmetic instructions for scalar double (on by default)
16698
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
16699
+; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
16702
-Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY)
16703
-; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
16704
+Target Undocumented Report Alias(mupper-regs-df)
16707
Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
16708
@@ -363,6 +374,14 @@
16709
Target RejectNegative Var(rs6000_spe_abi, 0)
16710
Do not use the SPE ABI extensions
16713
+Target RejectNegative Var(rs6000_elf_abi, 1) Save
16717
+Target RejectNegative Var(rs6000_elf_abi, 2)
16720
; These are here for testing during development only, do not document
16721
; in the manual please.
16723
@@ -443,6 +462,10 @@
16724
Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
16725
-mlong-double-<n> Specify size of long double (64 or 128 bits)
16728
+Target Report Var(rs6000_lra_flag) Init(0) Save
16729
+Use LRA instead of reload
16732
Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
16733
Determine which dependences between insns are considered costly
16734
@@ -514,3 +537,51 @@
16736
Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save
16737
Control whether we save the TOC in the prologue for indirect calls or generate the save inline
16740
+Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
16741
+Allow 128-bit integers in VSX registers
16744
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
16745
+Fuse certain integer operations together for better performance on power8
16747
+mpower8-fusion-sign
16748
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
16749
+Allow sign extension in fusion operations
16752
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
16753
+Use/do not use vector and scalar instructions added in ISA 2.07.
16756
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
16757
+Use ISA 2.07 crypto instructions
16760
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
16761
+Use ISA 2.07 direct move between GPR & VSX register instructions
16764
+Target Report Mask(HTM) Var(rs6000_isa_flags)
16765
+Use ISA 2.07 transactional memory (HTM) instructions
16768
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
16769
+Generate the quad word memory instructions (lq/stq).
16771
+mquad-memory-atomic
16772
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
16773
+Generate the quad word memory atomic instructions (lqarx/stqcx).
16775
+mcompat-align-parm
16776
+Target Report Var(rs6000_compat_align_parm) Init(1) Save
16777
+Generate aggregate parameter passing code with at most 64-bit alignment.
16780
+Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
16781
+Allow double variables in upper registers with -mcpu=power7 or -mvsx
16784
+Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
16785
+Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
16786
--- a/src/gcc/config/rs6000/linux64.h
16787
+++ b/src/gcc/config/rs6000/linux64.h
16790
#ifndef RS6000_BI_ARCH
16792
-#undef DEFAULT_ABI
16793
-#define DEFAULT_ABI ABI_AIX
16795
#undef TARGET_64BIT
16796
#define TARGET_64BIT 1
16799
#undef PROCESSOR_DEFAULT
16800
#define PROCESSOR_DEFAULT PROCESSOR_POWER7
16801
#undef PROCESSOR_DEFAULT64
16802
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
16803
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
16805
#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
16808
/* We don't need to generate entries in .fixup, except when
16809
-mrelocatable or -mrelocatable-lib is given. */
16811
#define INVALID_64BIT "-m%s not supported in this configuration"
16812
#define INVALID_32BIT INVALID_64BIT
16814
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
16815
+#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1)
16817
+#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2)
16820
#undef SUBSUBTARGET_OVERRIDE_OPTIONS
16821
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
16823
@@ -102,6 +109,12 @@
16824
error (INVALID_64BIT, "call"); \
16826
dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \
16827
+ if (ELFv2_ABI_CHECK) \
16829
+ rs6000_current_abi = ABI_ELFv2; \
16830
+ if (dot_symbols) \
16831
+ error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \
16833
if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \
16835
rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
16836
@@ -351,7 +364,11 @@
16837
#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
16839
#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
16840
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld64.so.1"
16841
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
16842
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
16844
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
16846
#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
16847
#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
16848
#if DEFAULT_LIBC == LIBC_UCLIBC
16849
--- a/src/gcc/config/rs6000/darwin.h
16850
+++ b/src/gcc/config/rs6000/darwin.h
16851
@@ -205,7 +205,8 @@
16852
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", \
16853
"vrsave", "vscr", \
16854
"spe_acc", "spefscr", \
16857
+ "tfhar", "tfiar", "texasr" \
16860
/* This outputs NAME to FILE. */
16861
--- a/src/gcc/config/rs6000/rs6000.c
16862
+++ b/src/gcc/config/rs6000/rs6000.c
16864
/* Subroutines used for code generation on IBM RS/6000.
16865
- Copyright (C) 1991-2013 Free Software Foundation, Inc.
16866
+ Copyright (C) 1991-2014 Free Software Foundation, Inc.
16867
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
16869
This file is part of GCC.
16872
#include "params.h"
16873
#include "tm-constrs.h"
16876
#include "tree-vectorizer.h"
16877
#include "dumpfile.h"
16879
int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
16880
int varargs_save_offset; /* offset to save the varargs registers */
16881
int ehrd_offset; /* offset to EH return data */
16882
+ int ehcr_offset; /* offset to EH CR field data */
16883
int reg_size; /* register size (4 or 8) */
16884
HOST_WIDE_INT vars_size; /* variable save area size */
16885
int parm_size; /* outgoing parameter size */
16886
@@ -139,6 +141,8 @@
16887
64-bits wide and is allocated early enough so that the offset
16888
does not overflow the 16-bit load/store offset field. */
16889
rtx sdmode_stack_slot;
16890
+ /* Flag if r2 setup is needed with ELFv2 ABI. */
16891
+ bool r2_setup_needed;
16892
} machine_function;
16894
/* Support targetm.vectorize.builtin_mask_for_load. */
16895
@@ -189,9 +193,6 @@
16896
/* Map register number to register class. */
16897
enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
16899
-/* Reload functions based on the type and the vector unit. */
16900
-static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
16902
static int dbg_cost_ctrl;
16904
/* Built in types. */
16905
@@ -289,6 +290,105 @@
16906
don't link in rs6000-c.c, so we can't call it directly. */
16907
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
16909
+/* Simplfy register classes into simpler classifications. We assume
16910
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
16911
+ check for standard register classes (gpr/floating/altivec/vsx) and
16912
+ floating/vector classes (float/altivec/vsx). */
16914
+enum rs6000_reg_type {
16919
+ ALTIVEC_REG_TYPE,
16927
+/* Map register class to register type. */
16928
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
16930
+/* First/last register type for the 'normal' register types (i.e. general
16931
+ purpose, floating point, altivec, and VSX registers). */
16932
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
16934
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
16937
+/* Register classes we care about in secondary reload or go if legitimate
16938
+ address. We only need to worry about GPR, FPR, and Altivec registers here,
16939
+ along an ANY field that is the OR of the 3 register classes. */
16941
+enum rs6000_reload_reg_type {
16942
+ RELOAD_REG_GPR, /* General purpose registers. */
16943
+ RELOAD_REG_FPR, /* Traditional floating point regs. */
16944
+ RELOAD_REG_VMX, /* Altivec (VMX) registers. */
16945
+ RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
16949
+/* For setting up register classes, loop through the 3 register classes mapping
16950
+ into real registers, and skip the ANY class, which is just an OR of the
16952
+#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
16953
+#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
16955
+/* Map reload register type to a register in the register class. */
16956
+struct reload_reg_map_type {
16957
+ const char *name; /* Register class name. */
16958
+ int reg; /* Register in the register class. */
16961
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
16962
+ { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
16963
+ { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
16964
+ { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
16965
+ { "Any", -1 }, /* RELOAD_REG_ANY. */
16968
+/* Mask bits for each register class, indexed per mode. Historically the
16969
+ compiler has been more restrictive which types can do PRE_MODIFY instead of
16970
+ PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
16971
+typedef unsigned char addr_mask_type;
16973
+#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
16974
+#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
16975
+#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
16976
+#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
16977
+#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
16978
+#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
16980
+/* Register type masks based on the type, of valid addressing modes. */
16981
+struct rs6000_reg_addr {
16982
+ enum insn_code reload_load; /* INSN to reload for loading. */
16983
+ enum insn_code reload_store; /* INSN to reload for storing. */
16984
+ enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
16985
+ enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
16986
+ enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
16987
+ addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
16990
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
16992
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
16993
+static inline bool
16994
+mode_supports_pre_incdec_p (enum machine_mode mode)
16996
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
17000
+/* Helper function to say whether a mode supports PRE_MODIFY. */
17001
+static inline bool
17002
+mode_supports_pre_modify_p (enum machine_mode mode)
17004
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
17009
/* Target cpu costs. */
17011
@@ -828,6 +928,25 @@
17012
12, /* prefetch streams */
17015
+/* Instruction costs on POWER8 processors. */
17017
+struct processor_costs power8_cost = {
17018
+ COSTS_N_INSNS (3), /* mulsi */
17019
+ COSTS_N_INSNS (3), /* mulsi_const */
17020
+ COSTS_N_INSNS (3), /* mulsi_const9 */
17021
+ COSTS_N_INSNS (3), /* muldi */
17022
+ COSTS_N_INSNS (19), /* divsi */
17023
+ COSTS_N_INSNS (35), /* divdi */
17024
+ COSTS_N_INSNS (3), /* fp */
17025
+ COSTS_N_INSNS (3), /* dmul */
17026
+ COSTS_N_INSNS (14), /* sdiv */
17027
+ COSTS_N_INSNS (17), /* ddiv */
17028
+ 128, /* cache line size */
17029
+ 32, /* l1 cache */
17030
+ 256, /* l2 cache */
17031
+ 12, /* prefetch streams */
17034
/* Instruction costs on POWER A2 processors. */
17036
struct processor_costs ppca2_cost = {
17037
@@ -855,6 +974,7 @@
17038
#undef RS6000_BUILTIN_A
17039
#undef RS6000_BUILTIN_D
17040
#undef RS6000_BUILTIN_E
17041
+#undef RS6000_BUILTIN_H
17042
#undef RS6000_BUILTIN_P
17043
#undef RS6000_BUILTIN_Q
17044
#undef RS6000_BUILTIN_S
17045
@@ -878,6 +998,9 @@
17046
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
17047
{ NAME, ICODE, MASK, ATTR },
17049
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
17050
+ { NAME, ICODE, MASK, ATTR },
17052
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
17053
{ NAME, ICODE, MASK, ATTR },
17055
@@ -908,6 +1031,7 @@
17056
#undef RS6000_BUILTIN_A
17057
#undef RS6000_BUILTIN_D
17058
#undef RS6000_BUILTIN_E
17059
+#undef RS6000_BUILTIN_H
17060
#undef RS6000_BUILTIN_P
17061
#undef RS6000_BUILTIN_Q
17062
#undef RS6000_BUILTIN_S
17063
@@ -948,6 +1072,7 @@
17064
static void paired_init_builtins (void);
17065
static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
17066
static void spe_init_builtins (void);
17067
+static void htm_init_builtins (void);
17068
static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
17069
static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
17070
static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
17071
@@ -1020,6 +1145,13 @@
17072
static void rs6000_print_builtin_options (FILE *, int, const char *,
17075
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
17076
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
17077
+ enum rs6000_reg_type,
17078
+ enum machine_mode,
17079
+ secondary_reload_info *,
17082
/* Hash table stuff for keeping track of TOC entries. */
17084
struct GTY(()) toc_hash_struct
17085
@@ -1068,7 +1200,9 @@
17086
/* SPE registers. */
17087
"spe_acc", "spefscr",
17088
/* Soft frame pointer. */
17091
+ /* HTM SPR registers. */
17092
+ "tfhar", "tfiar", "texasr"
17095
#ifdef TARGET_REGNAMES
17096
@@ -1094,7 +1228,9 @@
17097
/* SPE registers. */
17098
"spe_acc", "spefscr",
17099
/* Soft frame pointer. */
17102
+ /* HTM SPR registers. */
17103
+ "tfhar", "tfiar", "texasr"
17107
@@ -1316,6 +1452,9 @@
17108
#undef TARGET_RETURN_IN_MEMORY
17109
#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
17111
+#undef TARGET_RETURN_IN_MSB
17112
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
17114
#undef TARGET_SETUP_INCOMING_VARARGS
17115
#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
17117
@@ -1425,6 +1564,9 @@
17118
#undef TARGET_MODE_DEPENDENT_ADDRESS_P
17119
#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
17121
+#undef TARGET_LRA_P
17122
+#define TARGET_LRA_P rs6000_lra_p
17124
#undef TARGET_CAN_ELIMINATE
17125
#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
17127
@@ -1513,8 +1655,9 @@
17129
unsigned HOST_WIDE_INT reg_size;
17131
+ /* TF/TD modes are special in that they always take 2 registers. */
17132
if (FP_REGNO_P (regno))
17133
- reg_size = (VECTOR_MEM_VSX_P (mode)
17134
+ reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
17135
? UNITS_PER_VSX_WORD
17136
: UNITS_PER_FP_WORD);
17138
@@ -1546,16 +1689,38 @@
17140
int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
17142
+ /* PTImode can only go in GPRs. Quad word memory operations require even/odd
17143
+ register combinations, and use PTImode where we need to deal with quad
17144
+ word memory operations. Don't allow quad words in the argument or frame
17145
+ pointer registers, just registers 0..31. */
17146
+ if (mode == PTImode)
17147
+ return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
17148
+ && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
17149
+ && ((regno & 1) == 0));
17151
/* VSX registers that overlap the FPR registers are larger than for non-VSX
17152
implementations. Don't allow an item to be split between a FP register
17153
- and an Altivec register. */
17154
- if (VECTOR_MEM_VSX_P (mode))
17155
+ and an Altivec register. Allow TImode in all VSX registers if the user
17157
+ if (TARGET_VSX && VSX_REGNO_P (regno)
17158
+ && (VECTOR_MEM_VSX_P (mode)
17159
+ || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
17160
+ || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
17161
+ || (TARGET_VSX_TIMODE && mode == TImode)))
17163
if (FP_REGNO_P (regno))
17164
return FP_REGNO_P (last_regno);
17166
if (ALTIVEC_REGNO_P (regno))
17167
- return ALTIVEC_REGNO_P (last_regno);
17169
+ if (mode == SFmode && !TARGET_UPPER_REGS_SF)
17172
+ if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
17175
+ return ALTIVEC_REGNO_P (last_regno);
17179
/* The GPRs can hold any mode, but values bigger than one register
17180
@@ -1564,8 +1729,7 @@
17181
return INT_REGNO_P (last_regno);
17183
/* The float registers (except for VSX vector modes) can only hold floating
17184
- modes and DImode. This excludes the 32-bit decimal float mode for
17186
+ modes and DImode. */
17187
if (FP_REGNO_P (regno))
17189
if (SCALAR_FLOAT_MODE_P (mode)
17190
@@ -1599,9 +1763,8 @@
17191
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
17194
- /* We cannot put TImode anywhere except general register and it must be able
17195
- to fit within the register set. In the future, allow TImode in the
17196
- Altivec or VSX registers. */
17197
+ /* We cannot put non-VSX TImode or PTImode anywhere except general register
17198
+ and it must be able to fit within the register set. */
17200
return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
17202
@@ -1674,10 +1837,77 @@
17206
+ len += fprintf (stderr, "%sreg-class = %s", comma,
17207
+ reg_class_names[(int)rs6000_regno_regclass[r]]);
17212
+ fprintf (stderr, ",\n\t");
17216
fprintf (stderr, "%sregno = %d\n", comma, r);
17220
+static const char *
17221
+rs6000_debug_vector_unit (enum rs6000_vector v)
17227
+ case VECTOR_NONE: ret = "none"; break;
17228
+ case VECTOR_ALTIVEC: ret = "altivec"; break;
17229
+ case VECTOR_VSX: ret = "vsx"; break;
17230
+ case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
17231
+ case VECTOR_PAIRED: ret = "paired"; break;
17232
+ case VECTOR_SPE: ret = "spe"; break;
17233
+ case VECTOR_OTHER: ret = "other"; break;
17234
+ default: ret = "unknown"; break;
17240
+/* Print the address masks in a human readble fashion. */
17241
+DEBUG_FUNCTION void
17242
+rs6000_debug_print_mode (ssize_t m)
17246
+ fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
17247
+ for (rc = 0; rc < N_RELOAD_REG; rc++)
17249
+ addr_mask_type mask = reg_addr[m].addr_mask[rc];
17251
+ " %s: %c%c%c%c%c%c",
17252
+ reload_reg_map[rc].name,
17253
+ (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
17254
+ (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
17255
+ (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
17256
+ (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
17257
+ (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
17258
+ (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
17261
+ if (rs6000_vector_unit[m] != VECTOR_NONE
17262
+ || rs6000_vector_mem[m] != VECTOR_NONE
17263
+ || (reg_addr[m].reload_store != CODE_FOR_nothing)
17264
+ || (reg_addr[m].reload_load != CODE_FOR_nothing))
17267
+ " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
17268
+ rs6000_debug_vector_unit (rs6000_vector_unit[m]),
17269
+ rs6000_debug_vector_unit (rs6000_vector_mem[m]),
17270
+ (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
17271
+ (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
17274
+ fputs ("\n", stderr);
17277
#define DEBUG_FMT_ID "%-32s= "
17278
#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
17279
#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
17280
@@ -1690,6 +1920,7 @@
17281
static const char *const tf[2] = { "false", "true" };
17282
const char *nl = (const char *)0;
17284
+ size_t m1, m2, v;
17285
char costly_num[20];
17287
char flags_buffer[40];
17288
@@ -1700,20 +1931,67 @@
17289
const char *cmodel_str;
17290
struct cl_target_option cl_opts;
17292
- /* Map enum rs6000_vector to string. */
17293
- static const char *rs6000_debug_vector_unit[] = {
17300
+ /* Modes we want tieable information on. */
17301
+ static const enum machine_mode print_tieable_modes[] = {
17335
- fprintf (stderr, "Register information: (last virtual reg = %d)\n",
17336
- LAST_VIRTUAL_REGISTER);
17337
- rs6000_debug_reg_print (0, 31, "gr");
17338
- rs6000_debug_reg_print (32, 63, "fp");
17339
+ /* Virtual regs we are interested in. */
17340
+ const static struct {
17341
+ int regno; /* register number. */
17342
+ const char *name; /* register name. */
17343
+ } virtual_regs[] = {
17344
+ { STACK_POINTER_REGNUM, "stack pointer:" },
17345
+ { TOC_REGNUM, "toc: " },
17346
+ { STATIC_CHAIN_REGNUM, "static chain: " },
17347
+ { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
17348
+ { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
17349
+ { ARG_POINTER_REGNUM, "arg pointer: " },
17350
+ { FRAME_POINTER_REGNUM, "frame pointer:" },
17351
+ { FIRST_PSEUDO_REGISTER, "first pseudo: " },
17352
+ { FIRST_VIRTUAL_REGISTER, "first virtual:" },
17353
+ { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
17354
+ { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
17355
+ { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
17356
+ { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
17357
+ { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
17358
+ { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
17359
+ { LAST_VIRTUAL_REGISTER, "last virtual: " },
17362
+ fputs ("\nHard register information:\n", stderr);
17363
+ rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
17364
+ rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
17365
rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
17366
LAST_ALTIVEC_REGNO,
17368
@@ -1726,6 +2004,10 @@
17369
rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
17370
rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
17372
+ fputs ("\nVirtual/stack/frame registers:\n", stderr);
17373
+ for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
17374
+ fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
17378
"d reg_class = %s\n"
17379
@@ -1734,7 +2016,19 @@
17380
"wa reg_class = %s\n"
17381
"wd reg_class = %s\n"
17382
"wf reg_class = %s\n"
17383
- "ws reg_class = %s\n\n",
17384
+ "wg reg_class = %s\n"
17385
+ "wl reg_class = %s\n"
17386
+ "wm reg_class = %s\n"
17387
+ "wr reg_class = %s\n"
17388
+ "ws reg_class = %s\n"
17389
+ "wt reg_class = %s\n"
17390
+ "wu reg_class = %s\n"
17391
+ "wv reg_class = %s\n"
17392
+ "ww reg_class = %s\n"
17393
+ "wx reg_class = %s\n"
17394
+ "wy reg_class = %s\n"
17395
+ "wz reg_class = %s\n"
17397
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
17398
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
17399
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
17400
@@ -1741,18 +2035,51 @@
17401
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
17402
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
17403
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
17404
- reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
17405
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
17406
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
17407
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
17408
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
17409
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
17410
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
17411
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
17412
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
17413
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
17414
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
17415
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
17416
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
17419
for (m = 0; m < NUM_MACHINE_MODES; ++m)
17420
- if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
17423
- fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
17424
- GET_MODE_NAME (m),
17425
- rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
17426
- rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
17428
+ rs6000_debug_print_mode (m);
17430
+ fputs ("\n", stderr);
17432
+ for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
17434
+ enum machine_mode mode1 = print_tieable_modes[m1];
17435
+ bool first_time = true;
17437
+ nl = (const char *)0;
17438
+ for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
17440
+ enum machine_mode mode2 = print_tieable_modes[m2];
17441
+ if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
17445
+ fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
17447
+ first_time = false;
17450
+ fprintf (stderr, " %s", GET_MODE_NAME (mode2));
17455
+ fputs ("\n", stderr);
17459
fputs (nl, stderr);
17461
@@ -1913,6 +2240,7 @@
17463
case ABI_NONE: abi_str = "none"; break;
17464
case ABI_AIX: abi_str = "aix"; break;
17465
+ case ABI_ELFv2: abi_str = "ELFv2"; break;
17466
case ABI_V4: abi_str = "V4"; break;
17467
case ABI_DARWIN: abi_str = "darwin"; break;
17468
default: abi_str = "unknown"; break;
17469
@@ -1935,6 +2263,13 @@
17470
if (TARGET_LINK_STACK)
17471
fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
17473
+ if (targetm.lra_p ())
17474
+ fprintf (stderr, DEBUG_FMT_S, "lra", "true");
17476
+ if (TARGET_P8_FUSION)
17477
+ fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
17478
+ (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
17480
fprintf (stderr, DEBUG_FMT_S, "plt-format",
17481
TARGET_SECURE_PLT ? "secure" : "bss");
17482
fprintf (stderr, DEBUG_FMT_S, "struct-return",
17483
@@ -1954,11 +2289,106 @@
17484
(int)RS6000_BUILTIN_COUNT);
17488
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
17489
+ legitimate address support to figure out the appropriate addressing to
17493
+rs6000_setup_reg_addr_masks (void)
17495
+ ssize_t rc, reg, m, nregs;
17496
+ addr_mask_type any_addr_mask, addr_mask;
17498
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
17500
+ /* SDmode is special in that we want to access it only via REG+REG
17501
+ addressing on power7 and above, since we want to use the LFIWZX and
17502
+ STFIWZX instructions to load it. */
17503
+ bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
17505
+ any_addr_mask = 0;
17506
+ for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
17509
+ reg = reload_reg_map[rc].reg;
17511
+ /* Can mode values go in the GPR/FPR/Altivec registers? */
17512
+ if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
17514
+ nregs = rs6000_hard_regno_nregs[m][reg];
17515
+ addr_mask |= RELOAD_REG_VALID;
17517
+ /* Indicate if the mode takes more than 1 physical register. If
17518
+ it takes a single register, indicate it can do REG+REG
17520
+ if (nregs > 1 || m == BLKmode)
17521
+ addr_mask |= RELOAD_REG_MULTIPLE;
17523
+ addr_mask |= RELOAD_REG_INDEXED;
17525
+ /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
17526
+ addressing. Restrict addressing on SPE for 64-bit types
17527
+ because of the SUBREG hackery used to address 64-bit floats in
17528
+ '32-bit' GPRs. To simplify secondary reload, don't allow
17529
+ update forms on scalar floating point types that can go in the
17530
+ upper registers. */
17532
+ if (TARGET_UPDATE
17533
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
17534
+ && GET_MODE_SIZE (m) <= 8
17535
+ && !VECTOR_MODE_P (m)
17536
+ && !COMPLEX_MODE_P (m)
17537
+ && !indexed_only_p
17538
+ && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
17539
+ && !(m == DFmode && TARGET_UPPER_REGS_DF)
17540
+ && !(m == SFmode && TARGET_UPPER_REGS_SF))
17542
+ addr_mask |= RELOAD_REG_PRE_INCDEC;
17544
+ /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
17545
+ we don't allow PRE_MODIFY for some multi-register
17550
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
17554
+ if (TARGET_POWERPC64)
17555
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
17560
+ if (TARGET_DF_INSN)
17561
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
17567
+ /* GPR and FPR registers can do REG+OFFSET addressing, except
17568
+ possibly for SDmode. */
17569
+ if ((addr_mask != 0) && !indexed_only_p
17570
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
17571
+ addr_mask |= RELOAD_REG_OFFSET;
17573
+ reg_addr[m].addr_mask[rc] = addr_mask;
17574
+ any_addr_mask |= addr_mask;
17577
+ reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
17582
/* Initialize the various global tables that are based on register size. */
17584
rs6000_init_hard_regno_mode_ok (bool global_init_p)
17591
@@ -1987,21 +2417,55 @@
17592
rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
17593
rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
17594
rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
17595
+ rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
17596
+ rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
17597
+ rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
17598
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
17599
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
17601
- /* Precalculate vector information, this must be set up before the
17602
- rs6000_hard_regno_nregs_internal below. */
17603
- for (m = 0; m < NUM_MACHINE_MODES; ++m)
17604
+ /* Precalculate register class to simpler reload register class. We don't
17605
+ need all of the register classes that are combinations of different
17606
+ classes, just the simple ones that have constraint letters. */
17607
+ for (c = 0; c < N_REG_CLASSES; c++)
17608
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
17610
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
17611
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
17612
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
17613
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
17614
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
17615
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
17616
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
17617
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
17618
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
17619
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
17620
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
17621
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
17625
- rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
17626
- rs6000_vector_reload[m][0] = CODE_FOR_nothing;
17627
- rs6000_vector_reload[m][1] = CODE_FOR_nothing;
17628
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
17629
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
17633
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
17634
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
17637
- for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
17638
- rs6000_constraints[c] = NO_REGS;
17639
+ /* Precalculate the valid memory formats as well as the vector information,
17640
+ this must be set up before the rs6000_hard_regno_nregs_internal calls
17642
+ gcc_assert ((int)VECTOR_NONE == 0);
17643
+ memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
17644
+ memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
17646
+ gcc_assert ((int)CODE_FOR_nothing == 0);
17647
+ memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
17649
+ gcc_assert ((int)NO_REGS == 0);
17650
+ memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
17652
/* The VSX hardware allows native alignment for vectors, but control whether the compiler
17653
believes it can use native alignment or still uses 128-bit alignment. */
17654
if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
17655
@@ -2062,12 +2526,13 @@
17659
- /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
17660
- Altivec doesn't have 64-bit support. */
17661
+ /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
17662
+ do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
17665
rs6000_vector_mem[V2DImode] = VECTOR_VSX;
17666
- rs6000_vector_unit[V2DImode] = VECTOR_NONE;
17667
+ rs6000_vector_unit[V2DImode]
17668
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
17669
rs6000_vector_align[V2DImode] = align64;
17672
@@ -2076,14 +2541,48 @@
17674
rs6000_vector_unit[DFmode] = VECTOR_VSX;
17675
rs6000_vector_mem[DFmode]
17676
- = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
17677
+ = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
17678
rs6000_vector_align[DFmode] = align64;
17681
+ /* Allow TImode in VSX register and set the VSX memory macros. */
17682
+ if (TARGET_VSX && TARGET_VSX_TIMODE)
17684
+ rs6000_vector_mem[TImode] = VECTOR_VSX;
17685
+ rs6000_vector_align[TImode] = align64;
17688
/* TODO add SPE and paired floating point vector support. */
17690
/* Register class constraints for the constraints that depend on compile
17692
+ switches. When the VSX code was added, different constraints were added
17693
+ based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
17694
+ of the VSX registers are used. The register classes for scalar floating
17695
+ point types is set, based on whether we allow that type into the upper
17696
+ (Altivec) registers. GCC has register classes to target the Altivec
17697
+ registers for load/store operations, to select using a VSX memory
17698
+ operation instead of the traditional floating point operation. The
17701
+ d - Register class to use with traditional DFmode instructions.
17702
+ f - Register class to use with traditional SFmode instructions.
17703
+ v - Altivec register.
17704
+ wa - Any VSX register.
17705
+ wd - Preferred register class for V2DFmode.
17706
+ wf - Preferred register class for V4SFmode.
17707
+ wg - Float register for power6x move insns.
17708
+ wl - Float register if we can do 32-bit signed int loads.
17709
+ wm - VSX register for ISA 2.07 direct move operations.
17710
+ wr - GPR if 64-bit mode is permitted.
17711
+ ws - Register class to do ISA 2.06 DF operations.
17712
+ wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
17713
+ wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
17714
+ wt - VSX register for TImode in VSX registers.
17715
+ ww - Register class to do SF conversions in with VSX operations.
17716
+ wx - Float register if we can do 32-bit int stores.
17717
+ wy - Register class to do ISA 2.07 SF operations.
17718
+ wz - Float register if we can do 32-bit unsigned int loads. */
17720
if (TARGET_HARD_FLOAT && TARGET_FPRS)
17721
rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
17723
@@ -2092,64 +2591,158 @@
17727
- /* At present, we just use VSX_REGS, but we have different constraints
17728
- based on the use, in case we want to fine tune the default register
17729
- class used. wa = any VSX register, wf = register class to use for
17730
- V4SF, wd = register class to use for V2DF, and ws = register classs to
17731
- use for DF scalars. */
17732
rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
17733
+ rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
17734
rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
17735
- rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
17736
- rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
17740
+ if (TARGET_VSX_TIMODE)
17741
+ rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
17743
+ if (TARGET_UPPER_REGS_DF)
17745
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
17746
+ rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
17749
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
17752
+ /* Add conditional constraints based on various options, to allow us to
17753
+ collapse multiple insn patterns. */
17754
if (TARGET_ALTIVEC)
17755
rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
17757
- /* Set up the reload helper functions. */
17758
+ if (TARGET_MFPGPR)
17759
+ rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
17761
+ if (TARGET_LFIWAX)
17762
+ rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
17764
+ if (TARGET_DIRECT_MOVE)
17765
+ rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
17767
+ if (TARGET_POWERPC64)
17768
+ rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
17770
+ if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
17772
+ rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
17773
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
17774
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
17776
+ else if (TARGET_P8_VECTOR)
17778
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
17779
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
17781
+ else if (TARGET_VSX)
17782
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
17784
+ if (TARGET_STFIWX)
17785
+ rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
17787
+ if (TARGET_LFIWZX)
17788
+ rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
17790
+ /* Set up the reload helper and direct move functions. */
17791
if (TARGET_VSX || TARGET_ALTIVEC)
17795
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
17796
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
17797
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store;
17798
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load;
17799
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store;
17800
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load;
17801
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store;
17802
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load;
17803
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store;
17804
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load;
17805
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store;
17806
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load;
17807
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
17808
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
17809
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
17810
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
17811
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
17812
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
17813
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
17814
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
17815
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
17816
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
17817
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
17818
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
17819
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
17820
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
17822
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_di_store;
17823
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_di_load;
17824
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
17825
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
17826
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
17827
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
17829
+ if (TARGET_P8_VECTOR)
17831
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
17832
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
17833
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
17834
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
17836
+ if (TARGET_VSX_TIMODE)
17838
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
17839
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
17841
+ if (TARGET_DIRECT_MOVE)
17843
+ if (TARGET_POWERPC64)
17845
+ reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
17846
+ reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
17847
+ reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
17848
+ reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
17849
+ reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
17850
+ reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
17851
+ reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
17852
+ reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
17854
+ reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
17855
+ reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
17856
+ reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
17857
+ reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
17858
+ reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
17859
+ reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
17860
+ reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
17861
+ reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
17865
+ reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
17866
+ reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
17867
+ reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
17873
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
17874
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
17875
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store;
17876
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load;
17877
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store;
17878
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load;
17879
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store;
17880
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load;
17881
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store;
17882
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load;
17883
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store;
17884
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load;
17885
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
17886
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
17887
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
17888
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
17889
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
17890
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
17891
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
17892
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
17893
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
17894
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
17895
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
17896
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
17897
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
17898
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
17900
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_si_store;
17901
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_si_load;
17902
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
17903
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
17904
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
17905
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
17907
+ if (TARGET_P8_VECTOR)
17909
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
17910
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
17911
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
17912
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
17914
+ if (TARGET_VSX_TIMODE)
17916
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
17917
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
17922
@@ -2267,6 +2860,11 @@
17926
+ /* Update the addr mask bits in reg_addr to help secondary reload and go if
17927
+ legitimate address support to figure out the appropriate addressing to
17929
+ rs6000_setup_reg_addr_masks ();
17931
if (global_init_p || TARGET_DEBUG_TARGET)
17933
if (TARGET_DEBUG_REG)
17934
@@ -2369,16 +2967,19 @@
17936
rs6000_builtin_mask_calculate (void)
17938
- return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
17939
- | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
17940
- | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
17941
- | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
17942
- | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
17943
- | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
17944
- | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
17945
- | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
17946
- | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
17947
- | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0));
17948
+ return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
17949
+ | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
17950
+ | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
17951
+ | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
17952
+ | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
17953
+ | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
17954
+ | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
17955
+ | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
17956
+ | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
17957
+ | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
17958
+ | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
17959
+ | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
17960
+ | ((TARGET_HTM) ? RS6000_BTM_HTM : 0));
17963
/* Override command line options. Mostly we process the processor type and
17964
@@ -2609,6 +3210,24 @@
17968
+ /* If little-endian, default to -mstrict-align on older processors.
17969
+ Testing for htm matches power8 and later. */
17970
+ if (!BYTES_BIG_ENDIAN
17971
+ && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
17972
+ rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
17974
+ /* -maltivec={le,be} implies -maltivec. */
17975
+ if (rs6000_altivec_element_order != 0)
17976
+ rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
17978
+ /* Disallow -maltivec=le in big endian mode for now. This is not
17979
+ known to be useful for anyone. */
17980
+ if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
17982
+ warning (0, N_("-maltivec=le not allowed for big-endian targets"));
17983
+ rs6000_altivec_element_order = 0;
17986
/* Add some warnings for VSX. */
17989
@@ -2619,15 +3238,13 @@
17990
if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
17991
msg = N_("-mvsx requires hardware floating point");
17993
- rs6000_isa_flags &= ~ OPTION_MASK_VSX;
17995
+ rs6000_isa_flags &= ~ OPTION_MASK_VSX;
17996
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
17999
else if (TARGET_PAIRED_FLOAT)
18000
msg = N_("-mvsx and -mpaired are incompatible");
18001
- /* The hardware will allow VSX and little endian, but until we make sure
18002
- things like vector select, etc. work don't allow VSX on little endian
18003
- systems at this point. */
18004
- else if (!BYTES_BIG_ENDIAN)
18005
- msg = N_("-mvsx used with little endian code");
18006
else if (TARGET_AVOID_XFORM > 0)
18007
msg = N_("-mvsx needs indexed addressing");
18008
else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
18009
@@ -2647,9 +3264,24 @@
18013
+ /* If hard-float/altivec/vsx were explicitly turned off then don't allow
18014
+ the -mcpu setting to enable options that conflict. */
18015
+ if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
18016
+ && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
18017
+ | OPTION_MASK_ALTIVEC
18018
+ | OPTION_MASK_VSX)) != 0)
18019
+ rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
18020
+ | OPTION_MASK_DIRECT_MOVE)
18021
+ & ~rs6000_isa_flags_explicit);
18023
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
18024
+ rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
18026
/* For the newer switches (vsx, dfp, etc.) set some of the older options,
18027
unless the user explicitly used the -mno-<option> to disable the code. */
18029
+ if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
18030
+ rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
18031
+ else if (TARGET_VSX)
18032
rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
18033
else if (TARGET_POPCNTD)
18034
rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
18035
@@ -2664,6 +3296,92 @@
18036
else if (TARGET_ALTIVEC)
18037
rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
18039
+ if (TARGET_CRYPTO && !TARGET_ALTIVEC)
18041
+ if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
18042
+ error ("-mcrypto requires -maltivec");
18043
+ rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
18046
+ if (TARGET_DIRECT_MOVE && !TARGET_VSX)
18048
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
18049
+ error ("-mdirect-move requires -mvsx");
18050
+ rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
18053
+ if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
18055
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
18056
+ error ("-mpower8-vector requires -maltivec");
18057
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
18060
+ if (TARGET_P8_VECTOR && !TARGET_VSX)
18062
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
18063
+ error ("-mpower8-vector requires -mvsx");
18064
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
18067
+ if (TARGET_VSX_TIMODE && !TARGET_VSX)
18069
+ if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
18070
+ error ("-mvsx-timode requires -mvsx");
18071
+ rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
18074
+ /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
18075
+ silently turn off quad memory mode. */
18076
+ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
18078
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
18079
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
18081
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
18082
+ warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
18084
+ rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
18085
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC);
18088
+ /* Non-atomic quad memory load/store are disabled for little endian, since
18089
+ the words are reversed, but atomic operations can still be done by
18090
+ swapping the words. */
18091
+ if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
18093
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
18094
+ warning (0, N_("-mquad-memory is not available in little endian mode"));
18096
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
18099
+ /* Assume if the user asked for normal quad memory instructions, they want
18100
+ the atomic versions as well, unless they explicity told us not to use quad
18101
+ word atomic instructions. */
18102
+ if (TARGET_QUAD_MEMORY
18103
+ && !TARGET_QUAD_MEMORY_ATOMIC
18104
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
18105
+ rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
18107
+ /* Enable power8 fusion if we are tuning for power8, even if we aren't
18108
+ generating power8 instructions. */
18109
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
18110
+ rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
18111
+ & OPTION_MASK_P8_FUSION);
18113
+ /* Power8 does not fuse sign extended loads with the addis. If we are
18114
+ optimizing at high levels for speed, convert a sign extended load into a
18115
+ zero extending load, and an explicit sign extension. */
18116
+ if (TARGET_P8_FUSION
18117
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
18118
+ && optimize_function_for_speed_p (cfun)
18119
+ && optimize >= 3)
18120
+ rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
18122
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
18123
+ rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
18125
/* E500mc does "better" if we inline more aggressively. Respect the
18126
user's opinion, though. */
18127
if (rs6000_block_move_inline_limit == 0
18128
@@ -2790,6 +3508,9 @@
18129
if (flag_section_anchors)
18130
TARGET_NO_FP_IN_TOC = 1;
18132
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
18133
+ rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
18135
#ifdef SUBTARGET_OVERRIDE_OPTIONS
18136
SUBTARGET_OVERRIDE_OPTIONS;
18138
@@ -2800,6 +3521,9 @@
18139
SUB3TARGET_OVERRIDE_OPTIONS;
18142
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
18143
+ rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
18145
/* For the E500 family of cores, reset the single/double FP flags to let us
18146
check that they remain constant across attributes or pragmas. Also,
18147
clear a possible request for string instructions, not supported and which
18148
@@ -2849,16 +3573,19 @@
18149
&& rs6000_cpu != PROCESSOR_POWER5
18150
&& rs6000_cpu != PROCESSOR_POWER6
18151
&& rs6000_cpu != PROCESSOR_POWER7
18152
+ && rs6000_cpu != PROCESSOR_POWER8
18153
&& rs6000_cpu != PROCESSOR_PPCA2
18154
&& rs6000_cpu != PROCESSOR_CELL
18155
&& rs6000_cpu != PROCESSOR_PPC476);
18156
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
18157
|| rs6000_cpu == PROCESSOR_POWER5
18158
- || rs6000_cpu == PROCESSOR_POWER7);
18159
+ || rs6000_cpu == PROCESSOR_POWER7
18160
+ || rs6000_cpu == PROCESSOR_POWER8);
18161
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
18162
|| rs6000_cpu == PROCESSOR_POWER5
18163
|| rs6000_cpu == PROCESSOR_POWER6
18164
|| rs6000_cpu == PROCESSOR_POWER7
18165
+ || rs6000_cpu == PROCESSOR_POWER8
18166
|| rs6000_cpu == PROCESSOR_PPCE500MC
18167
|| rs6000_cpu == PROCESSOR_PPCE500MC64
18168
|| rs6000_cpu == PROCESSOR_PPCE5500
18169
@@ -2988,7 +3715,7 @@
18171
/* We should always be splitting complex arguments, but we can't break
18172
Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
18173
- if (DEFAULT_ABI != ABI_AIX)
18174
+ if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
18175
targetm.calls.split_complex_arg = NULL;
18178
@@ -3102,6 +3829,10 @@
18179
rs6000_cost = &power7_cost;
18182
+ case PROCESSOR_POWER8:
18183
+ rs6000_cost = &power8_cost;
18186
case PROCESSOR_PPCA2:
18187
rs6000_cost = &ppca2_cost;
18189
@@ -3274,7 +4005,8 @@
18190
&& (rs6000_cpu == PROCESSOR_POWER4
18191
|| rs6000_cpu == PROCESSOR_POWER5
18192
|| rs6000_cpu == PROCESSOR_POWER6
18193
- || rs6000_cpu == PROCESSOR_POWER7))
18194
+ || rs6000_cpu == PROCESSOR_POWER7
18195
+ || rs6000_cpu == PROCESSOR_POWER8))
18198
return align_loops_log;
18199
@@ -3813,6 +4545,22 @@
18200
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
18203
+ case BUILT_IN_CLZIMAX:
18204
+ case BUILT_IN_CLZLL:
18205
+ case BUILT_IN_CLZL:
18206
+ case BUILT_IN_CLZ:
18207
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
18209
+ if (out_mode == QImode && out_n == 16)
18210
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
18211
+ else if (out_mode == HImode && out_n == 8)
18212
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
18213
+ else if (out_mode == SImode && out_n == 4)
18214
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
18215
+ else if (out_mode == DImode && out_n == 2)
18216
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
18219
case BUILT_IN_COPYSIGN:
18220
if (VECTOR_UNIT_VSX_P (V2DFmode)
18221
&& out_mode == DFmode && out_n == 2
18222
@@ -3828,6 +4576,22 @@
18223
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
18224
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
18226
+ case BUILT_IN_POPCOUNTIMAX:
18227
+ case BUILT_IN_POPCOUNTLL:
18228
+ case BUILT_IN_POPCOUNTL:
18229
+ case BUILT_IN_POPCOUNT:
18230
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
18232
+ if (out_mode == QImode && out_n == 16)
18233
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
18234
+ else if (out_mode == HImode && out_n == 8)
18235
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
18236
+ else if (out_mode == SImode && out_n == 4)
18237
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
18238
+ else if (out_mode == DImode && out_n == 2)
18239
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
18242
case BUILT_IN_SQRT:
18243
if (VECTOR_UNIT_VSX_P (V2DFmode)
18244
&& out_mode == DFmode && out_n == 2
18245
@@ -4043,7 +4807,11 @@
18249
- if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
18250
+ if (DEFAULT_ABI == ABI_ELFv2)
18251
+ fprintf (file, "\t.abiversion 2\n");
18253
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
18254
+ || (TARGET_ELF && flag_pic == 2))
18256
switch_to_section (toc_section);
18257
switch_to_section (text_section);
18258
@@ -4239,7 +5007,7 @@
18260
val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
18262
- msb_val = val > 0 ? 0 : -1;
18263
+ msb_val = val >= 0 ? 0 : -1;
18265
/* Construct the value to be splatted, if possible. If not, return 0. */
18266
for (i = 2; i <= copies; i *= 2)
18267
@@ -4274,15 +5042,16 @@
18269
/* Check if VAL is present in every STEP-th element, and the
18270
other elements are filled with its most significant bit. */
18271
- for (i = 0; i < nunits - 1; ++i)
18272
+ for (i = 1; i < nunits; ++i)
18274
HOST_WIDE_INT desired_val;
18275
- if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
18276
+ unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
18277
+ if ((i & (step - 1)) == 0)
18280
desired_val = msb_val;
18282
- if (desired_val != const_vector_elt_as_int (op, i))
18283
+ if (desired_val != const_vector_elt_as_int (op, elt))
18287
@@ -4698,9 +5467,12 @@
18289
rtx freg = gen_reg_rtx (V4SFmode);
18290
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
18291
+ rtx cvt = ((TARGET_XSCVDPSPN)
18292
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
18293
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
18295
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
18296
- emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
18298
+ emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
18302
@@ -4811,10 +5583,27 @@
18303
XVECEXP (mask, 0, elt*width + i)
18304
= GEN_INT (i + 0x10);
18305
x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
18306
- x = gen_rtx_UNSPEC (mode,
18307
- gen_rtvec (3, target, reg,
18308
- force_reg (V16QImode, x)),
18311
+ if (BYTES_BIG_ENDIAN)
18312
+ x = gen_rtx_UNSPEC (mode,
18313
+ gen_rtvec (3, target, reg,
18314
+ force_reg (V16QImode, x)),
18318
+ /* Invert selector. */
18319
+ rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
18320
+ gen_rtx_CONST_INT (QImode, -1));
18321
+ rtx tmp = gen_reg_rtx (V16QImode);
18322
+ emit_move_insn (tmp, splat);
18323
+ x = gen_rtx_MINUS (V16QImode, tmp, force_reg (V16QImode, x));
18324
+ emit_move_insn (tmp, x);
18326
+ /* Permute with operands reversed and adjusted selector. */
18327
+ x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
18331
emit_insn (gen_rtx_SET (VOIDmode, target, x));
18334
@@ -4938,7 +5727,7 @@
18336
if (GET_CODE (op) == SUBREG
18337
&& (mode == SImode || mode == DImode || mode == TImode
18338
- || mode == DDmode || mode == TDmode)
18339
+ || mode == DDmode || mode == TDmode || mode == PTImode)
18340
&& REG_P (SUBREG_REG (op))
18341
&& (GET_MODE (SUBREG_REG (op)) == DFmode
18342
|| GET_MODE (SUBREG_REG (op)) == TFmode))
18343
@@ -4951,6 +5740,7 @@
18344
&& REG_P (SUBREG_REG (op))
18345
&& (GET_MODE (SUBREG_REG (op)) == DImode
18346
|| GET_MODE (SUBREG_REG (op)) == TImode
18347
+ || GET_MODE (SUBREG_REG (op)) == PTImode
18348
|| GET_MODE (SUBREG_REG (op)) == DDmode
18349
|| GET_MODE (SUBREG_REG (op)) == TDmode))
18351
@@ -5087,6 +5877,73 @@
18352
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
18355
+/* Return true if this is a move direct operation between GPR registers and
18356
+ floating point/VSX registers. */
18359
+direct_move_p (rtx op0, rtx op1)
18361
+ int regno0, regno1;
18363
+ if (!REG_P (op0) || !REG_P (op1))
18366
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
18369
+ regno0 = REGNO (op0);
18370
+ regno1 = REGNO (op1);
18371
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
18374
+ if (INT_REGNO_P (regno0))
18375
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
18377
+ else if (INT_REGNO_P (regno1))
18379
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
18382
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
18389
+/* Return true if this is a load or store quad operation. This function does
18390
+ not handle the atomic quad memory instructions. */
18393
+quad_load_store_p (rtx op0, rtx op1)
18397
+ if (!TARGET_QUAD_MEMORY)
18400
+ else if (REG_P (op0) && MEM_P (op1))
18401
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
18402
+ && quad_memory_operand (op1, GET_MODE (op1))
18403
+ && !reg_overlap_mentioned_p (op0, op1));
18405
+ else if (MEM_P (op0) && REG_P (op1))
18406
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
18407
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
18412
+ if (TARGET_DEBUG_ADDR)
18414
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
18415
+ ret ? "true" : "false");
18416
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
18422
/* Given an address, return a constant offset term if one exists. */
18425
@@ -5170,7 +6027,11 @@
18429
- /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. */
18431
+ /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
18432
+ TImode is not a vector mode, if we want to use the VSX registers to
18433
+ move it around, we need to restrict ourselves to reg+reg
18435
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
18438
@@ -5184,6 +6045,13 @@
18443
+ /* If we can do direct load/stores of SDmode, restrict it to reg+reg
18444
+ addressing for the LFIWZX and STFIWX instructions. */
18445
+ if (TARGET_NO_SDMODE_STACK)
18452
@@ -5387,7 +6255,7 @@
18454
if (!reg_offset_addressing_ok_p (mode))
18455
return virtual_stack_registers_memory_p (x);
18456
- if (legitimate_constant_pool_address_p (x, mode, strict))
18457
+ if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
18459
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
18461
@@ -5416,7 +6284,7 @@
18463
/* If we are using VSX scalar loads, restrict ourselves to reg+reg
18465
- if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
18466
+ if (VECTOR_MEM_VSX_P (mode))
18470
@@ -5435,6 +6303,7 @@
18478
@@ -5527,9 +6396,21 @@
18480
if (TARGET_ELF || TARGET_MACHO)
18482
- if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
18483
+ bool large_toc_ok;
18485
+ if (DEFAULT_ABI == ABI_V4 && flag_pic)
18488
+ /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
18489
+ push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
18490
+ recognizes some LO_SUM addresses as valid although this
18491
+ function says opposite. In most cases, LRA through different
18492
+ transformations can generate correct code for address reloads.
18493
+ It can not manage only some LO_SUM cases. So we need to add
18494
+ code analogous to one in rs6000_legitimize_reload_address for
18495
+ LOW_SUM here saying that some addresses are still valid. */
18496
+ large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
18497
+ && small_toc_ref (x, VOIDmode));
18498
+ if (TARGET_TOC && ! large_toc_ok)
18500
if (GET_MODE_NUNITS (mode) != 1)
18502
@@ -5539,7 +6420,7 @@
18503
&& (mode == DFmode || mode == DDmode)))
18506
- return CONSTANT_P (x);
18507
+ return CONSTANT_P (x) || large_toc_ok;
18511
@@ -5583,8 +6464,11 @@
18512
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
18513
return force_reg (Pmode, XEXP (x, 0));
18515
+ /* For TImode with load/store quad, restrict addresses to just a single
18516
+ pointer, so it works with both GPRs and VSX registers. */
18517
/* Make sure both operands are registers. */
18518
- else if (GET_CODE (x) == PLUS)
18519
+ else if (GET_CODE (x) == PLUS
18520
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
18521
return gen_rtx_PLUS (Pmode,
18522
force_reg (Pmode, XEXP (x, 0)),
18523
force_reg (Pmode, XEXP (x, 1)));
18524
@@ -5604,11 +6488,12 @@
18529
/* As in legitimate_offset_address_p we do not assume
18530
worst-case. The mode here is just a hint as to the registers
18531
used. A TImode is usually in gprs, but may actually be in
18532
fprs. Leave worst-case scenario for reload to handle via
18533
- insn constraints. */
18534
+ insn constraints. PTImode is only GPRs. */
18538
@@ -6100,10 +6985,13 @@
18539
1, const0_rtx, Pmode);
18541
r3 = gen_rtx_REG (Pmode, 3);
18542
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
18543
- insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
18544
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
18545
- insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
18546
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18548
+ if (TARGET_64BIT)
18549
+ insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
18551
+ insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
18553
else if (DEFAULT_ABI == ABI_V4)
18554
insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
18556
@@ -6122,10 +7010,13 @@
18557
1, const0_rtx, Pmode);
18559
r3 = gen_rtx_REG (Pmode, 3);
18560
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
18561
- insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
18562
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
18563
- insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
18564
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18566
+ if (TARGET_64BIT)
18567
+ insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
18569
+ insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
18571
else if (DEFAULT_ABI == ABI_V4)
18572
insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
18574
@@ -6240,7 +7131,6 @@
18575
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
18576
get_pool_mode (sym)))
18577
|| (TARGET_CMODEL == CMODEL_MEDIUM
18578
- && !CONSTANT_POOL_ADDRESS_P (sym)
18579
&& SYMBOL_REF_LOCAL_P (sym)));
18582
@@ -6339,7 +7229,7 @@
18583
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
18584
|| mode == DDmode || mode == TDmode
18585
|| mode == DImode))
18586
- && VECTOR_MEM_NONE_P (mode))
18587
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
18589
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
18590
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
18591
@@ -6370,7 +7260,7 @@
18593
if (GET_CODE (x) == SYMBOL_REF
18595
- && VECTOR_MEM_NONE_P (mode)
18596
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
18597
&& !SPE_VECTOR_MODE (mode)
18599
&& DEFAULT_ABI == ABI_DARWIN
18600
@@ -6396,6 +7286,8 @@
18601
mem is sufficiently aligned. */
18604
+ && (mode != TImode || !TARGET_VSX_TIMODE)
18605
+ && mode != PTImode
18606
&& (mode != DImode || TARGET_POWERPC64)
18607
&& ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
18608
|| (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
18609
@@ -6516,15 +7408,9 @@
18611
if (legitimate_indirect_address_p (x, reg_ok_strict))
18613
- if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
18614
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
18615
- && !SPE_VECTOR_MODE (mode)
18616
- && mode != TFmode
18617
- && mode != TDmode
18618
- /* Restrict addressing for DI because of our SUBREG hackery. */
18619
- && !(TARGET_E500_DOUBLE
18620
- && (mode == DFmode || mode == DDmode || mode == DImode))
18622
+ if (TARGET_UPDATE
18623
+ && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
18624
+ && mode_supports_pre_incdec_p (mode)
18625
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
18627
if (virtual_stack_registers_memory_p (x))
18628
@@ -6532,8 +7418,16 @@
18629
if (reg_offset_p && legitimate_small_data_p (mode, x))
18632
- && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
18633
+ && legitimate_constant_pool_address_p (x, mode,
18634
+ reg_ok_strict || lra_in_progress))
18636
+ /* For TImode, if we have load/store quad and TImode in VSX registers, only
18637
+ allow register indirect addresses. This will allow the values to go in
18638
+ either GPRs or VSX registers without reloading. The vector types would
18639
+ tend to go into VSX registers, so we allow REG+REG, while TImode seems
18640
+ somewhat split, in that some uses are GPR based, and some VSX based. */
18641
+ if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
18643
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
18644
if (! reg_ok_strict
18646
@@ -6545,8 +7439,7 @@
18648
if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
18650
- if (mode != TImode
18651
- && mode != TFmode
18652
+ if (mode != TFmode
18654
&& ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
18655
|| TARGET_POWERPC64
18656
@@ -6553,23 +7446,13 @@
18657
|| (mode != DFmode && mode != DDmode)
18658
|| (TARGET_E500_DOUBLE && mode != DDmode))
18659
&& (TARGET_POWERPC64 || mode != DImode)
18660
+ && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
18661
+ && mode != PTImode
18662
&& !avoiding_indexed_address_p (mode)
18663
&& legitimate_indexed_address_p (x, reg_ok_strict))
18665
- if (GET_CODE (x) == PRE_MODIFY
18666
- && mode != TImode
18667
- && mode != TFmode
18668
- && mode != TDmode
18669
- && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
18670
- || TARGET_POWERPC64
18671
- || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
18672
- && (TARGET_POWERPC64 || mode != DImode)
18673
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
18674
- && !SPE_VECTOR_MODE (mode)
18675
- /* Restrict addressing for DI because of our SUBREG hackery. */
18676
- && !(TARGET_E500_DOUBLE
18677
- && (mode == DFmode || mode == DDmode || mode == DImode))
18679
+ if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
18680
+ && mode_supports_pre_modify_p (mode)
18681
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
18682
&& (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
18683
reg_ok_strict, false)
18684
@@ -6590,10 +7473,13 @@
18685
bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
18687
"\nrs6000_legitimate_address_p: return = %s, mode = %s, "
18688
- "strict = %d, code = %s\n",
18689
+ "strict = %d, reload = %s, code = %s\n",
18690
ret ? "true" : "false",
18691
GET_MODE_NAME (mode),
18693
+ (reload_completed
18695
+ : (reload_in_progress ? "progress" : "before")),
18696
GET_RTX_NAME (GET_CODE (x)));
18699
@@ -6759,7 +7645,7 @@
18701
/* The TOC register is not killed across calls in a way that is
18702
visible to the compiler. */
18703
- if (DEFAULT_ABI == ABI_AIX)
18704
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18705
call_really_used_regs[2] = 0;
18707
if (DEFAULT_ABI == ABI_V4
18708
@@ -6819,6 +7705,7 @@
18709
fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
18714
/* Try to output insns to set TARGET equal to the constant C if it can
18715
be done in less than N insns. Do all computations in MODE.
18716
@@ -7007,7 +7894,7 @@
18719
/* Helper for the following. Get rid of [r+r] memory refs
18720
- in cases where it won't work (TImode, TFmode, TDmode). */
18721
+ in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
18724
rs6000_eliminate_indexed_memrefs (rtx operands[2])
18725
@@ -7032,6 +7919,107 @@
18726
copy_addr_to_reg (XEXP (operands[1], 0)));
18729
+/* Generate a vector of constants to permute MODE for a little-endian
18730
+ storage operation by swapping the two halves of a vector. */
18732
+rs6000_const_vec (enum machine_mode mode)
18754
+ gcc_unreachable();
18757
+ v = rtvec_alloc (subparts);
18759
+ for (i = 0; i < subparts / 2; ++i)
18760
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
18761
+ for (i = subparts / 2; i < subparts; ++i)
18762
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
18767
+/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
18768
+ for a VSX load or store operation. */
18770
+rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
18772
+ rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
18773
+ return gen_rtx_VEC_SELECT (mode, source, par);
18776
+/* Emit a little-endian load from vector memory location SOURCE to VSX
18777
+ register DEST in mode MODE. The load is done with two permuting
18778
+ insn's that represent an lxvd2x and xxpermdi. */
18780
+rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
18782
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
18783
+ rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode);
18784
+ rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
18785
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
18786
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
18789
+/* Emit a little-endian store to vector memory location DEST from VSX
18790
+ register SOURCE in mode MODE. The store is done with two permuting
18791
+ insn's that represent an xxpermdi and an stxvd2x. */
18793
+rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
18795
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
18796
+ rtx permute_src = rs6000_gen_le_vsx_permute (source, mode);
18797
+ rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
18798
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
18799
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
18802
+/* Emit a sequence representing a little-endian VSX load or store,
18803
+ moving data from SOURCE to DEST in mode MODE. This is done
18804
+ separately from rs6000_emit_move to ensure it is called only
18805
+ during expand. LE VSX loads and stores introduced later are
18806
+ handled with a split. The expand-time RTL generation allows
18807
+ us to optimize away redundant pairs of register-permutes. */
18809
+rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
18811
+ gcc_assert (!BYTES_BIG_ENDIAN
18812
+ && VECTOR_MEM_VSX_P (mode)
18813
+ && mode != TImode
18814
+ && !gpr_or_gpr_p (dest, source)
18815
+ && (MEM_P (source) ^ MEM_P (dest)));
18817
+ if (MEM_P (source))
18819
+ gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
18820
+ rs6000_emit_le_vsx_load (dest, source, mode);
18824
+ if (!REG_P (source))
18825
+ source = force_reg (mode, source);
18826
+ rs6000_emit_le_vsx_store (dest, source, mode);
18830
/* Emit a move from SOURCE to DEST in mode MODE. */
18832
rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
18833
@@ -7150,8 +8138,71 @@
18834
cfun->machine->sdmode_stack_slot =
18835
eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
18838
+ if (lra_in_progress
18839
+ && mode == SDmode
18840
+ && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
18841
+ && reg_preferred_class (REGNO (operands[0])) == NO_REGS
18842
+ && (REG_P (operands[1])
18843
+ || (GET_CODE (operands[1]) == SUBREG
18844
+ && REG_P (SUBREG_REG (operands[1])))))
18846
+ int regno = REGNO (GET_CODE (operands[1]) == SUBREG
18847
+ ? SUBREG_REG (operands[1]) : operands[1]);
18848
+ enum reg_class cl;
18850
+ if (regno >= FIRST_PSEUDO_REGISTER)
18852
+ cl = reg_preferred_class (regno);
18853
+ gcc_assert (cl != NO_REGS);
18854
+ regno = ira_class_hard_regs[cl][0];
18856
+ if (FP_REGNO_P (regno))
18858
+ if (GET_MODE (operands[0]) != DDmode)
18859
+ operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
18860
+ emit_insn (gen_movsd_store (operands[0], operands[1]));
18862
+ else if (INT_REGNO_P (regno))
18863
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
18865
+ gcc_unreachable();
18868
+ if (lra_in_progress
18869
+ && mode == SDmode
18870
+ && (REG_P (operands[0])
18871
+ || (GET_CODE (operands[0]) == SUBREG
18872
+ && REG_P (SUBREG_REG (operands[0]))))
18873
+ && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
18874
+ && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
18876
+ int regno = REGNO (GET_CODE (operands[0]) == SUBREG
18877
+ ? SUBREG_REG (operands[0]) : operands[0]);
18878
+ enum reg_class cl;
18880
+ if (regno >= FIRST_PSEUDO_REGISTER)
18882
+ cl = reg_preferred_class (regno);
18883
+ gcc_assert (cl != NO_REGS);
18884
+ regno = ira_class_hard_regs[cl][0];
18886
+ if (FP_REGNO_P (regno))
18888
+ if (GET_MODE (operands[1]) != DDmode)
18889
+ operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
18890
+ emit_insn (gen_movsd_load (operands[0], operands[1]));
18892
+ else if (INT_REGNO_P (regno))
18893
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
18895
+ gcc_unreachable();
18899
if (reload_in_progress
18901
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
18902
&& MEM_P (operands[0])
18903
&& rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
18904
&& REG_P (operands[1]))
18905
@@ -7164,7 +8215,9 @@
18907
else if (INT_REGNO_P (REGNO (operands[1])))
18909
- rtx mem = adjust_address_nv (operands[0], mode, 4);
18910
+ rtx mem = operands[0];
18911
+ if (BYTES_BIG_ENDIAN)
18912
+ mem = adjust_address_nv (mem, mode, 4);
18913
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
18914
emit_insn (gen_movsd_hardfloat (mem, operands[1]));
18916
@@ -7176,6 +8229,7 @@
18918
&& REG_P (operands[0])
18919
&& MEM_P (operands[1])
18920
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
18921
&& rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
18923
if (FP_REGNO_P (REGNO (operands[0])))
18924
@@ -7186,7 +8240,9 @@
18926
else if (INT_REGNO_P (REGNO (operands[0])))
18928
- rtx mem = adjust_address_nv (operands[1], mode, 4);
18929
+ rtx mem = operands[1];
18930
+ if (BYTES_BIG_ENDIAN)
18931
+ mem = adjust_address_nv (mem, mode, 4);
18932
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
18933
emit_insn (gen_movsd_hardfloat (operands[0], mem));
18935
@@ -7389,6 +8445,11 @@
18939
+ if (!VECTOR_MEM_VSX_P (TImode))
18940
+ rs6000_eliminate_indexed_memrefs (operands);
18944
rs6000_eliminate_indexed_memrefs (operands);
18947
@@ -7427,18 +8488,231 @@
18950
/* Nonzero if we can use a floating-point register to pass this arg. */
18951
-#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE) \
18952
+#define USE_FP_FOR_ARG_P(CUM,MODE) \
18953
(SCALAR_FLOAT_MODE_P (MODE) \
18954
&& (CUM)->fregno <= FP_ARG_MAX_REG \
18955
&& TARGET_HARD_FLOAT && TARGET_FPRS)
18957
/* Nonzero if we can use an AltiVec register to pass this arg. */
18958
-#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED) \
18959
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
18960
(ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
18961
&& (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
18962
&& TARGET_ALTIVEC_ABI \
18965
+/* Walk down the type tree of TYPE counting consecutive base elements.
18966
+ If *MODEP is VOIDmode, then set it to the first valid floating point
18967
+ or vector type. If a non-floating point or vector type is found, or
18968
+ if a floating point or vector type that doesn't match a non-VOIDmode
18969
+ *MODEP is found, then return -1, otherwise return the count in the
18973
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
18975
+ enum machine_mode mode;
18976
+ HOST_WIDE_INT size;
18978
+ switch (TREE_CODE (type))
18981
+ mode = TYPE_MODE (type);
18982
+ if (!SCALAR_FLOAT_MODE_P (mode))
18985
+ if (*modep == VOIDmode)
18988
+ if (*modep == mode)
18993
+ case COMPLEX_TYPE:
18994
+ mode = TYPE_MODE (TREE_TYPE (type));
18995
+ if (!SCALAR_FLOAT_MODE_P (mode))
18998
+ if (*modep == VOIDmode)
19001
+ if (*modep == mode)
19006
+ case VECTOR_TYPE:
19007
+ if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
19010
+ /* Use V4SImode as representative of all 128-bit vector types. */
19011
+ size = int_size_in_bytes (type);
19021
+ if (*modep == VOIDmode)
19024
+ /* Vector modes are considered to be opaque: two vectors are
19025
+ equivalent for the purposes of being homogeneous aggregates
19026
+ if they are the same size. */
19027
+ if (*modep == mode)
19035
+ tree index = TYPE_DOMAIN (type);
19037
+ /* Can't handle incomplete types. */
19038
+ if (!COMPLETE_TYPE_P (type))
19041
+ count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
19044
+ || !TYPE_MAX_VALUE (index)
19045
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
19046
+ || !TYPE_MIN_VALUE (index)
19047
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
19051
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
19052
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
19054
+ /* There must be no padding. */
19055
+ if (!host_integerp (TYPE_SIZE (type), 1)
19056
+ || (tree_low_cst (TYPE_SIZE (type), 1)
19057
+ != count * GET_MODE_BITSIZE (*modep)))
19063
+ case RECORD_TYPE:
19069
+ /* Can't handle incomplete types. */
19070
+ if (!COMPLETE_TYPE_P (type))
19073
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
19075
+ if (TREE_CODE (field) != FIELD_DECL)
19078
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
19079
+ if (sub_count < 0)
19081
+ count += sub_count;
19084
+ /* There must be no padding. */
19085
+ if (!host_integerp (TYPE_SIZE (type), 1)
19086
+ || (tree_low_cst (TYPE_SIZE (type), 1)
19087
+ != count * GET_MODE_BITSIZE (*modep)))
19094
+ case QUAL_UNION_TYPE:
19096
+ /* These aren't very interesting except in a degenerate case. */
19101
+ /* Can't handle incomplete types. */
19102
+ if (!COMPLETE_TYPE_P (type))
19105
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
19107
+ if (TREE_CODE (field) != FIELD_DECL)
19110
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
19111
+ if (sub_count < 0)
19113
+ count = count > sub_count ? count : sub_count;
19116
+ /* There must be no padding. */
19117
+ if (!host_integerp (TYPE_SIZE (type), 1)
19118
+ || (tree_low_cst (TYPE_SIZE (type), 1)
19119
+ != count * GET_MODE_BITSIZE (*modep)))
19132
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
19133
+ float or vector aggregate that shall be passed in FP/vector registers
19134
+ according to the ELFv2 ABI, return the homogeneous element mode in
19135
+ *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
19137
+ Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
19140
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
19141
+ enum machine_mode *elt_mode,
19144
+ /* Note that we do not accept complex types at the top level as
19145
+ homogeneous aggregates; these types are handled via the
19146
+ targetm.calls.split_complex_arg mechanism. Complex types
19147
+ can be elements of homogeneous aggregates, however. */
19148
+ if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
19150
+ enum machine_mode field_mode = VOIDmode;
19151
+ int field_count = rs6000_aggregate_candidate (type, &field_mode);
19153
+ if (field_count > 0)
19155
+ int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
19156
+ (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
19158
+ /* The ELFv2 ABI allows homogeneous aggregates to occupy
19159
+ up to AGGR_ARG_NUM_REG registers. */
19160
+ if (field_count * n_regs <= AGGR_ARG_NUM_REG)
19163
+ *elt_mode = field_mode;
19165
+ *n_elts = field_count;
19172
+ *elt_mode = mode;
19178
/* Return a nonzero value to say to return the function value in
19179
memory, just as large structures are always returned. TYPE will be
19180
the data type of the value, and FNTYPE will be the type of the
19181
@@ -7491,6 +8765,16 @@
19182
/* Otherwise fall through to more conventional ABI rules. */
19185
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
19186
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
19190
+ /* The ELFv2 ABI returns aggregates up to 16B in registers */
19191
+ if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
19192
+ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
19195
if (AGGREGATE_TYPE_P (type)
19196
&& (aix_struct_return
19197
|| (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
19198
@@ -7522,6 +8806,19 @@
19202
+/* Specify whether values returned in registers should be at the most
19203
+ significant end of a register. We want aggregates returned by
19204
+ value to match the way aggregates are passed to functions. */
19207
+rs6000_return_in_msb (const_tree valtype)
19209
+ return (DEFAULT_ABI == ABI_ELFv2
19210
+ && BYTES_BIG_ENDIAN
19211
+ && AGGREGATE_TYPE_P (valtype)
19212
+ && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
19215
#ifdef HAVE_AS_GNU_ATTRIBUTE
19216
/* Return TRUE if a call to function FNDECL may be one that
19217
potentially affects the function calling ABI of the object file. */
19218
@@ -7658,7 +8955,7 @@
19220
rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
19222
- if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
19223
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
19224
return must_pass_in_stack_var_size (mode, type);
19226
return must_pass_in_stack_var_size_or_pad (mode, type);
19227
@@ -7739,6 +9036,11 @@
19228
static unsigned int
19229
rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
19231
+ enum machine_mode elt_mode;
19234
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
19236
if (DEFAULT_ABI == ABI_V4
19237
&& (GET_MODE_SIZE (mode) == 8
19238
|| (TARGET_HARD_FLOAT
19239
@@ -7750,12 +9052,13 @@
19240
&& int_size_in_bytes (type) >= 8
19241
&& int_size_in_bytes (type) < 16))
19243
- else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
19244
+ else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
19245
|| (type && TREE_CODE (type) == VECTOR_TYPE
19246
&& int_size_in_bytes (type) >= 16))
19248
- else if (TARGET_MACHO
19249
- && rs6000_darwin64_abi
19250
+ else if (((TARGET_MACHO && rs6000_darwin64_abi)
19251
+ || DEFAULT_ABI == ABI_ELFv2
19252
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
19254
&& type && TYPE_ALIGN (type) > 64)
19256
@@ -7763,6 +9066,16 @@
19257
return PARM_BOUNDARY;
19260
+/* The offset in words to the start of the parameter save area. */
19262
+static unsigned int
19263
+rs6000_parm_offset (void)
19265
+ return (DEFAULT_ABI == ABI_V4 ? 2
19266
+ : DEFAULT_ABI == ABI_ELFv2 ? 4
19270
/* For a function parm of MODE and TYPE, return the starting word in
19271
the parameter area. NWORDS of the parameter area are already used. */
19273
@@ -7771,11 +9084,9 @@
19274
unsigned int nwords)
19276
unsigned int align;
19277
- unsigned int parm_offset;
19279
align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
19280
- parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
19281
- return nwords + (-(parm_offset + nwords) & align);
19282
+ return nwords + (-(rs6000_parm_offset () + nwords) & align);
19285
/* Compute the size (in words) of a function argument. */
19286
@@ -7882,7 +9193,7 @@
19288
if (TREE_CODE (ftype) == RECORD_TYPE)
19289
rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
19290
- else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
19291
+ else if (USE_FP_FOR_ARG_P (cum, mode))
19293
unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
19294
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
19295
@@ -7923,7 +9234,7 @@
19297
cum->words += n_fpregs;
19299
- else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
19300
+ else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
19302
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
19304
@@ -7960,6 +9271,11 @@
19305
rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
19306
const_tree type, bool named, int depth)
19308
+ enum machine_mode elt_mode;
19311
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
19313
/* Only tick off an argument if we're not recursing. */
19315
cum->nargs_prototype--;
19316
@@ -7980,15 +9296,16 @@
19319
if (TARGET_ALTIVEC_ABI
19320
- && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
19321
+ && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
19322
|| (type && TREE_CODE (type) == VECTOR_TYPE
19323
&& int_size_in_bytes (type) == 16)))
19325
bool stack = false;
19327
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
19328
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
19331
+ cum->vregno += n_elts;
19333
if (!TARGET_ALTIVEC)
19334
error ("cannot pass argument in vector register because"
19335
" altivec instructions are disabled, use -maltivec"
19336
@@ -7997,7 +9314,8 @@
19337
/* PowerPC64 Linux and AIX allocate GPRs for a vector argument
19338
even if it is going to be passed in a vector register.
19339
Darwin does the same for variable-argument functions. */
19340
- if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
19341
+ if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19343
|| (cum->stdarg && DEFAULT_ABI != ABI_V4))
19346
@@ -8008,15 +9326,13 @@
19350
- /* Vector parameters must be 16-byte aligned. This places
19351
- them at 2 mod 4 in terms of words in 32-bit mode, since
19352
- the parameter save area starts at offset 24 from the
19353
- stack. In 64-bit mode, they just have to start on an
19354
- even word, since the parameter save area is 16-byte
19355
- aligned. Space for GPRs is reserved even if the argument
19356
- will be passed in memory. */
19357
+ /* Vector parameters must be 16-byte aligned. In 32-bit
19358
+ mode this means we need to take into account the offset
19359
+ to the parameter save area. In 64-bit mode, they just
19360
+ have to start on an even word, since the parameter save
19361
+ area is 16-byte aligned. */
19363
- align = (2 - cum->words) & 3;
19364
+ align = -(rs6000_parm_offset () + cum->words) & 3;
19366
align = cum->words & 1;
19367
cum->words += align + rs6000_arg_size (mode, type);
19368
@@ -8141,15 +9457,15 @@
19370
cum->words = align_words + n_words;
19372
- if (SCALAR_FLOAT_MODE_P (mode)
19373
+ if (SCALAR_FLOAT_MODE_P (elt_mode)
19374
&& TARGET_HARD_FLOAT && TARGET_FPRS)
19376
/* _Decimal128 must be passed in an even/odd float register pair.
19377
This assumes that the register number is odd when fregno is
19379
- if (mode == TDmode && (cum->fregno % 2) == 1)
19380
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
19382
- cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
19383
+ cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
19386
if (TARGET_DEBUG_ARG)
19387
@@ -8359,7 +9675,7 @@
19389
if (TREE_CODE (ftype) == RECORD_TYPE)
19390
rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
19391
- else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
19392
+ else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
19394
unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
19396
@@ -8387,7 +9703,7 @@
19397
if (mode == TFmode || mode == TDmode)
19400
- else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
19401
+ else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
19403
rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
19405
@@ -8504,6 +9820,84 @@
19406
return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
19409
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
19410
+ but must also be copied into the parameter save area starting at
19411
+ offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
19412
+ to the GPRs and/or memory. Return the number of elements used. */
19415
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
19416
+ int align_words, rtx *rvec)
19420
+ if (align_words < GP_ARG_NUM_REG)
19422
+ int n_words = rs6000_arg_size (mode, type);
19424
+ if (align_words + n_words > GP_ARG_NUM_REG
19425
+ || mode == BLKmode
19426
+ || (TARGET_32BIT && TARGET_POWERPC64))
19428
+ /* If this is partially on the stack, then we only
19429
+ include the portion actually in registers here. */
19430
+ enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
19433
+ if (align_words + n_words > GP_ARG_NUM_REG)
19435
+ /* Not all of the arg fits in gprs. Say that it goes in memory
19436
+ too, using a magic NULL_RTX component. Also see comment in
19437
+ rs6000_mixed_function_arg for why the normal
19438
+ function_arg_partial_nregs scheme doesn't work in this case. */
19439
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
19444
+ rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
19445
+ rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
19446
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
19448
+ while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
19452
+ /* The whole arg fits in gprs. */
19453
+ rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
19454
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
19459
+ /* It's entirely in memory. */
19460
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
19466
+/* RVEC is a vector of K components of an argument of mode MODE.
19467
+ Construct the final function_arg return value from it. */
19470
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
19472
+ gcc_assert (k >= 1);
19474
+ /* Avoid returning a PARALLEL in the trivial cases. */
19477
+ if (XEXP (rvec[0], 0) == NULL_RTX)
19480
+ if (GET_MODE (XEXP (rvec[0], 0)) == mode)
19481
+ return XEXP (rvec[0], 0);
19484
+ return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
19487
/* Determine where to put an argument to a function.
19488
Value is zero to push the argument on the stack,
19489
or a hard register in which to store the argument.
19490
@@ -8538,6 +9932,8 @@
19492
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
19493
enum rs6000_abi abi = DEFAULT_ABI;
19494
+ enum machine_mode elt_mode;
19497
/* Return a marker to indicate whether CR1 needs to set or clear the
19498
bit that V.4 uses to say fp args were passed in registers.
19499
@@ -8564,6 +9960,8 @@
19500
return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
19503
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
19505
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
19507
rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
19508
@@ -8572,33 +9970,30 @@
19509
/* Else fall through to usual handling. */
19512
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
19513
- if (TARGET_64BIT && ! cum->prototype)
19515
- /* Vector parameters get passed in vector register
19516
- and also in GPRs or memory, in absence of prototype. */
19519
- align_words = (cum->words + 1) & ~1;
19520
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
19522
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
19526
- if (align_words >= GP_ARG_NUM_REG)
19532
- slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
19534
- return gen_rtx_PARALLEL (mode,
19536
- gen_rtx_EXPR_LIST (VOIDmode,
19537
- slot, const0_rtx),
19538
- gen_rtx_EXPR_LIST (VOIDmode,
19539
- gen_rtx_REG (mode, cum->vregno),
19543
- return gen_rtx_REG (mode, cum->vregno);
19544
+ /* Do we also need to pass this argument in the parameter
19546
+ if (TARGET_64BIT && ! cum->prototype)
19548
+ int align_words = (cum->words + 1) & ~1;
19549
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
19552
+ /* Describe where this argument goes in the vector registers. */
19553
+ for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
19555
+ r = gen_rtx_REG (elt_mode, cum->vregno + i);
19556
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
19557
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
19560
+ return rs6000_finish_function_arg (mode, rvec, k);
19562
else if (TARGET_ALTIVEC_ABI
19563
&& (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
19564
|| (type && TREE_CODE (type) == VECTOR_TYPE
19565
@@ -8613,13 +10008,13 @@
19566
int align, align_words, n_words;
19567
enum machine_mode part_mode;
19569
- /* Vector parameters must be 16-byte aligned. This places them at
19570
- 2 mod 4 in terms of words in 32-bit mode, since the parameter
19571
- save area starts at offset 24 from the stack. In 64-bit mode,
19572
- they just have to start on an even word, since the parameter
19573
- save area is 16-byte aligned. */
19574
+ /* Vector parameters must be 16-byte aligned. In 32-bit
19575
+ mode this means we need to take into account the offset
19576
+ to the parameter save area. In 64-bit mode, they just
19577
+ have to start on an even word, since the parameter save
19578
+ area is 16-byte aligned. */
19580
- align = (2 - cum->words) & 3;
19581
+ align = -(rs6000_parm_offset () + cum->words) & 3;
19583
align = cum->words & 1;
19584
align_words = cum->words + align;
19585
@@ -8697,92 +10092,44 @@
19587
/* _Decimal128 must be passed in an even/odd float register pair.
19588
This assumes that the register number is odd when fregno is odd. */
19589
- if (mode == TDmode && (cum->fregno % 2) == 1)
19590
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
19593
- if (USE_FP_FOR_ARG_P (cum, mode, type))
19594
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
19596
- rtx rvec[GP_ARG_NUM_REG + 1];
19599
- bool needs_psave;
19600
- enum machine_mode fmode = mode;
19601
- unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
19602
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
19605
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
19607
- if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
19609
- /* Currently, we only ever need one reg here because complex
19610
- doubles are split. */
19611
- gcc_assert (cum->fregno == FP_ARG_MAX_REG
19612
- && (fmode == TFmode || fmode == TDmode));
19613
+ /* Do we also need to pass this argument in the parameter
19615
+ if (type && (cum->nargs_prototype <= 0
19616
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19617
+ && TARGET_XL_COMPAT
19618
+ && align_words >= GP_ARG_NUM_REG)))
19619
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
19621
- /* Long double or _Decimal128 split over regs and memory. */
19622
- fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
19625
- /* Do we also need to pass this arg in the parameter save
19627
- needs_psave = (type
19628
- && (cum->nargs_prototype <= 0
19629
- || (DEFAULT_ABI == ABI_AIX
19630
- && TARGET_XL_COMPAT
19631
- && align_words >= GP_ARG_NUM_REG)));
19633
- if (!needs_psave && mode == fmode)
19634
- return gen_rtx_REG (fmode, cum->fregno);
19638
+ /* Describe where this argument goes in the fprs. */
19639
+ for (i = 0; i < n_elts
19640
+ && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
19642
- /* Describe the part that goes in gprs or the stack.
19643
- This piece must come first, before the fprs. */
19644
- if (align_words < GP_ARG_NUM_REG)
19645
+ /* Check if the argument is split over registers and memory.
19646
+ This can only ever happen for long double or _Decimal128;
19647
+ complex types are handled via split_complex_arg. */
19648
+ enum machine_mode fmode = elt_mode;
19649
+ if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
19651
- unsigned long n_words = rs6000_arg_size (mode, type);
19652
+ gcc_assert (fmode == TFmode || fmode == TDmode);
19653
+ fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
19656
- if (align_words + n_words > GP_ARG_NUM_REG
19657
- || (TARGET_32BIT && TARGET_POWERPC64))
19659
- /* If this is partially on the stack, then we only
19660
- include the portion actually in registers here. */
19661
- enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
19664
- if (align_words + n_words > GP_ARG_NUM_REG)
19665
- /* Not all of the arg fits in gprs. Say that it
19666
- goes in memory too, using a magic NULL_RTX
19667
- component. Also see comment in
19668
- rs6000_mixed_function_arg for why the normal
19669
- function_arg_partial_nregs scheme doesn't work
19671
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
19675
- r = gen_rtx_REG (rmode,
19676
- GP_ARG_MIN_REG + align_words);
19677
- off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
19678
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
19680
- while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
19684
- /* The whole arg fits in gprs. */
19685
- r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
19686
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
19690
- /* It's entirely in memory. */
19691
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
19692
+ r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
19693
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
19694
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
19697
- /* Describe where this piece goes in the fprs. */
19698
- r = gen_rtx_REG (fmode, cum->fregno);
19699
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
19701
- return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
19702
+ return rs6000_finish_function_arg (mode, rvec, k);
19704
else if (align_words < GP_ARG_NUM_REG)
19706
@@ -8789,9 +10136,6 @@
19707
if (TARGET_32BIT && TARGET_POWERPC64)
19708
return rs6000_mixed_function_arg (mode, type, align_words);
19710
- if (mode == BLKmode)
19713
return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
19716
@@ -8810,16 +10154,32 @@
19717
tree type, bool named)
19719
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
19720
+ bool passed_in_gprs = true;
19723
+ enum machine_mode elt_mode;
19726
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
19728
if (DEFAULT_ABI == ABI_V4)
19731
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
19732
- && cum->nargs_prototype >= 0)
19734
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
19736
+ /* If we are passing this arg in the fixed parameter save area
19737
+ (gprs or memory) as well as VRs, we do not use the partial
19738
+ bytes mechanism; instead, rs6000_function_arg will return a
19739
+ PARALLEL including a memory element as necessary. */
19740
+ if (TARGET_64BIT && ! cum->prototype)
19743
+ /* Otherwise, we pass in VRs only. Check for partial copies. */
19744
+ passed_in_gprs = false;
19745
+ if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
19746
+ ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
19749
/* In this complicated case we just disable the partial_nregs code. */
19750
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
19752
@@ -8826,26 +10186,30 @@
19754
align_words = rs6000_parm_start (mode, type, cum->words);
19756
- if (USE_FP_FOR_ARG_P (cum, mode, type))
19757
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
19759
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
19761
/* If we are passing this arg in the fixed parameter save area
19762
- (gprs or memory) as well as fprs, then this function should
19763
- return the number of partial bytes passed in the parameter
19764
- save area rather than partial bytes passed in fprs. */
19765
+ (gprs or memory) as well as FPRs, we do not use the partial
19766
+ bytes mechanism; instead, rs6000_function_arg will return a
19767
+ PARALLEL including a memory element as necessary. */
19769
&& (cum->nargs_prototype <= 0
19770
- || (DEFAULT_ABI == ABI_AIX
19771
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19772
&& TARGET_XL_COMPAT
19773
&& align_words >= GP_ARG_NUM_REG)))
19775
- else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
19776
- > FP_ARG_MAX_REG + 1)
19777
- ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
19778
- else if (cum->nargs_prototype >= 0)
19781
+ /* Otherwise, we pass in FPRs only. Check for partial copies. */
19782
+ passed_in_gprs = false;
19783
+ if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
19784
+ ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
19785
+ * MIN (8, GET_MODE_SIZE (elt_mode)));
19788
- if (align_words < GP_ARG_NUM_REG
19789
+ if (passed_in_gprs
19790
+ && align_words < GP_ARG_NUM_REG
19791
&& GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
19792
ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
19794
@@ -8926,6 +10290,139 @@
19798
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
19799
+ already processes. Return true if the parameter must be passed
19800
+ (fully or partially) on the stack. */
19803
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
19805
+ enum machine_mode mode;
19809
+ /* Catch errors. */
19810
+ if (type == NULL || type == error_mark_node)
19813
+ /* Handle types with no storage requirement. */
19814
+ if (TYPE_MODE (type) == VOIDmode)
19817
+ /* Handle complex types. */
19818
+ if (TREE_CODE (type) == COMPLEX_TYPE)
19819
+ return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
19820
+ || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
19822
+ /* Handle transparent aggregates. */
19823
+ if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
19824
+ && TYPE_TRANSPARENT_AGGR (type))
19825
+ type = TREE_TYPE (first_field (type));
19827
+ /* See if this arg was passed by invisible reference. */
19828
+ if (pass_by_reference (get_cumulative_args (args_so_far),
19829
+ TYPE_MODE (type), type, true))
19830
+ type = build_pointer_type (type);
19832
+ /* Find mode as it is passed by the ABI. */
19833
+ unsignedp = TYPE_UNSIGNED (type);
19834
+ mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
19836
+ /* If we must pass in stack, we need a stack. */
19837
+ if (rs6000_must_pass_in_stack (mode, type))
19840
+ /* If there is no incoming register, we need a stack. */
19841
+ entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
19842
+ if (entry_parm == NULL)
19845
+ /* Likewise if we need to pass both in registers and on the stack. */
19846
+ if (GET_CODE (entry_parm) == PARALLEL
19847
+ && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
19850
+ /* Also true if we're partially in registers and partially not. */
19851
+ if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
19854
+ /* Update info on where next arg arrives in registers. */
19855
+ rs6000_function_arg_advance (args_so_far, mode, type, true);
19859
+/* Return true if FUN has no prototype, has a variable argument
19860
+ list, or passes any parameter in memory. */
19863
+rs6000_function_parms_need_stack (tree fun)
19865
+ function_args_iterator args_iter;
19867
+ CUMULATIVE_ARGS args_so_far_v;
19868
+ cumulative_args_t args_so_far;
19871
+ /* Must be a libcall, all of which only use reg parms. */
19873
+ if (!TYPE_P (fun))
19874
+ fun = TREE_TYPE (fun);
19876
+ /* Varargs functions need the parameter save area. */
19877
+ if (!prototype_p (fun) || stdarg_p (fun))
19880
+ INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
19881
+ args_so_far = pack_cumulative_args (&args_so_far_v);
19883
+ if (aggregate_value_p (TREE_TYPE (fun), fun))
19885
+ tree type = build_pointer_type (TREE_TYPE (fun));
19886
+ rs6000_parm_needs_stack (args_so_far, type);
19889
+ FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
19890
+ if (rs6000_parm_needs_stack (args_so_far, arg_type))
19896
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
19897
+ usually a constant depending on the ABI. However, in the ELFv2 ABI
19898
+ the register parameter area is optional when calling a function that
19899
+ has a prototype is scope, has no variable argument list, and passes
19900
+ all parameters in registers. */
19903
+rs6000_reg_parm_stack_space (tree fun)
19905
+ int reg_parm_stack_space;
19907
+ switch (DEFAULT_ABI)
19910
+ reg_parm_stack_space = 0;
19915
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
19919
+ /* ??? Recomputing this every time is a bit expensive. Is there
19920
+ a place to cache this information? */
19921
+ if (rs6000_function_parms_need_stack (fun))
19922
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
19924
+ reg_parm_stack_space = 0;
19928
+ return reg_parm_stack_space;
19932
rs6000_move_block_from_reg (int regno, rtx x, int nregs)
19934
@@ -9307,8 +10804,10 @@
19935
We don't need to check for pass-by-reference because of the test above.
19936
We can return a simplifed answer, since we know there's no offset to add. */
19939
- && rs6000_darwin64_abi
19940
+ if (((TARGET_MACHO
19941
+ && rs6000_darwin64_abi)
19942
+ || DEFAULT_ABI == ABI_ELFv2
19943
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
19944
&& integer_zerop (TYPE_SIZE (type)))
19946
unsigned HOST_WIDE_INT align, boundary;
19947
@@ -9603,6 +11102,7 @@
19948
#undef RS6000_BUILTIN_A
19949
#undef RS6000_BUILTIN_D
19950
#undef RS6000_BUILTIN_E
19951
+#undef RS6000_BUILTIN_H
19952
#undef RS6000_BUILTIN_P
19953
#undef RS6000_BUILTIN_Q
19954
#undef RS6000_BUILTIN_S
19955
@@ -9616,6 +11116,7 @@
19956
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
19957
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
19958
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
19959
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
19960
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
19961
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
19962
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
19963
@@ -9634,6 +11135,7 @@
19964
#undef RS6000_BUILTIN_A
19965
#undef RS6000_BUILTIN_D
19966
#undef RS6000_BUILTIN_E
19967
+#undef RS6000_BUILTIN_H
19968
#undef RS6000_BUILTIN_P
19969
#undef RS6000_BUILTIN_Q
19970
#undef RS6000_BUILTIN_S
19971
@@ -9647,6 +11149,7 @@
19972
{ MASK, ICODE, NAME, ENUM },
19974
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
19975
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
19976
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
19977
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
19978
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
19979
@@ -9665,6 +11168,7 @@
19980
#undef RS6000_BUILTIN_A
19981
#undef RS6000_BUILTIN_D
19982
#undef RS6000_BUILTIN_E
19983
+#undef RS6000_BUILTIN_H
19984
#undef RS6000_BUILTIN_P
19985
#undef RS6000_BUILTIN_Q
19986
#undef RS6000_BUILTIN_S
19987
@@ -9678,6 +11182,7 @@
19988
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
19989
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
19990
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
19991
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
19992
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
19993
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
19994
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
19995
@@ -9694,6 +11199,7 @@
19996
#undef RS6000_BUILTIN_A
19997
#undef RS6000_BUILTIN_D
19998
#undef RS6000_BUILTIN_E
19999
+#undef RS6000_BUILTIN_H
20000
#undef RS6000_BUILTIN_P
20001
#undef RS6000_BUILTIN_Q
20002
#undef RS6000_BUILTIN_S
20003
@@ -9705,6 +11211,7 @@
20004
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
20005
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20006
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20007
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20008
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
20009
{ MASK, ICODE, NAME, ENUM },
20011
@@ -9726,6 +11233,7 @@
20012
#undef RS6000_BUILTIN_A
20013
#undef RS6000_BUILTIN_D
20014
#undef RS6000_BUILTIN_E
20015
+#undef RS6000_BUILTIN_H
20016
#undef RS6000_BUILTIN_P
20017
#undef RS6000_BUILTIN_Q
20018
#undef RS6000_BUILTIN_S
20019
@@ -9737,6 +11245,7 @@
20020
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
20021
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20022
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20023
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20024
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20025
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
20026
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
20027
@@ -9756,6 +11265,7 @@
20028
#undef RS6000_BUILTIN_A
20029
#undef RS6000_BUILTIN_D
20030
#undef RS6000_BUILTIN_E
20031
+#undef RS6000_BUILTIN_H
20032
#undef RS6000_BUILTIN_P
20033
#undef RS6000_BUILTIN_Q
20034
#undef RS6000_BUILTIN_S
20035
@@ -9769,6 +11279,7 @@
20036
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
20037
{ MASK, ICODE, NAME, ENUM },
20039
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20040
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20041
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
20042
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
20043
@@ -9786,6 +11297,7 @@
20044
#undef RS6000_BUILTIN_A
20045
#undef RS6000_BUILTIN_D
20046
#undef RS6000_BUILTIN_E
20047
+#undef RS6000_BUILTIN_H
20048
#undef RS6000_BUILTIN_P
20049
#undef RS6000_BUILTIN_Q
20050
#undef RS6000_BUILTIN_S
20051
@@ -9797,6 +11309,7 @@
20052
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
20053
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20054
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20055
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20056
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20057
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
20058
{ MASK, ICODE, NAME, ENUM },
20059
@@ -9817,6 +11330,7 @@
20060
#undef RS6000_BUILTIN_A
20061
#undef RS6000_BUILTIN_D
20062
#undef RS6000_BUILTIN_E
20063
+#undef RS6000_BUILTIN_H
20064
#undef RS6000_BUILTIN_P
20065
#undef RS6000_BUILTIN_Q
20066
#undef RS6000_BUILTIN_S
20067
@@ -9830,6 +11344,7 @@
20069
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20070
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20071
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20072
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20073
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
20074
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
20075
@@ -9847,8 +11362,9 @@
20076
#undef RS6000_BUILTIN_2
20077
#undef RS6000_BUILTIN_3
20078
#undef RS6000_BUILTIN_A
20079
+#undef RS6000_BUILTIN_D
20080
#undef RS6000_BUILTIN_E
20081
-#undef RS6000_BUILTIN_D
20082
+#undef RS6000_BUILTIN_H
20083
#undef RS6000_BUILTIN_P
20084
#undef RS6000_BUILTIN_Q
20085
#undef RS6000_BUILTIN_S
20086
@@ -9862,6 +11378,7 @@
20087
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
20088
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20089
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20090
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
20091
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20092
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
20093
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
20094
@@ -9872,6 +11389,7 @@
20095
#include "rs6000-builtin.def"
20098
+/* HTM builtins. */
20099
#undef RS6000_BUILTIN_1
20100
#undef RS6000_BUILTIN_2
20101
#undef RS6000_BUILTIN_3
20102
@@ -9878,11 +11396,42 @@
20103
#undef RS6000_BUILTIN_A
20104
#undef RS6000_BUILTIN_D
20105
#undef RS6000_BUILTIN_E
20106
+#undef RS6000_BUILTIN_H
20107
#undef RS6000_BUILTIN_P
20108
#undef RS6000_BUILTIN_Q
20109
#undef RS6000_BUILTIN_S
20110
#undef RS6000_BUILTIN_X
20112
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
20113
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
20114
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
20115
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
20116
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
20117
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
20118
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
20119
+ { MASK, ICODE, NAME, ENUM },
20121
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
20122
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
20123
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
20124
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
20126
+static const struct builtin_description bdesc_htm[] =
20128
+#include "rs6000-builtin.def"
20131
+#undef RS6000_BUILTIN_1
20132
+#undef RS6000_BUILTIN_2
20133
+#undef RS6000_BUILTIN_3
20134
+#undef RS6000_BUILTIN_A
20135
+#undef RS6000_BUILTIN_D
20136
+#undef RS6000_BUILTIN_E
20137
+#undef RS6000_BUILTIN_H
20138
+#undef RS6000_BUILTIN_P
20139
+#undef RS6000_BUILTIN_Q
20140
+#undef RS6000_BUILTIN_S
20142
/* Return true if a builtin function is overloaded. */
20144
rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
20145
@@ -10190,7 +11739,101 @@
20149
+/* Return a constant vector for use as a little-endian permute control vector
20150
+ to reverse the order of elements of the given vector mode. */
20152
+swap_selector_for_mode (enum machine_mode mode)
20154
+ /* These are little endian vectors, so their elements are reversed
20155
+ from what you would normally expect for a permute control vector. */
20156
+ unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
20157
+ unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
20158
+ unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
20159
+ unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
20160
+ unsigned int *swaparray, i;
20167
+ swaparray = swap2;
20171
+ swaparray = swap4;
20174
+ swaparray = swap8;
20177
+ swaparray = swap16;
20180
+ gcc_unreachable ();
20183
+ for (i = 0; i < 16; ++i)
20184
+ perm[i] = GEN_INT (swaparray[i]);
20186
+ return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
20189
+/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
20190
+ with -maltivec=be specified. Issue the load followed by an element-reversing
20193
+altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
20195
+ rtx tmp = gen_reg_rtx (mode);
20196
+ rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
20197
+ rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
20198
+ rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
20199
+ rtx sel = swap_selector_for_mode (mode);
20200
+ rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
20202
+ gcc_assert (REG_P (op0));
20204
+ emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
20207
+/* Generate code for a "stvx" or "stvxl" built-in for a little endian target
20208
+ with -maltivec=be specified. Issue the store preceded by an element-reversing
20211
+altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
20213
+ rtx tmp = gen_reg_rtx (mode);
20214
+ rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
20215
+ rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
20216
+ rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
20217
+ rtx sel = swap_selector_for_mode (mode);
20220
+ gcc_assert (REG_P (op1));
20221
+ vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
20222
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
20226
+/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
20227
+ specified. Issue the store preceded by an element-reversing permute. */
20229
+altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
20231
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
20232
+ rtx tmp = gen_reg_rtx (mode);
20233
+ rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
20234
+ rtx sel = swap_selector_for_mode (mode);
20237
+ gcc_assert (REG_P (op1));
20238
+ vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
20239
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
20240
+ emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
20244
altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
20247
@@ -10352,7 +11995,198 @@
20251
+/* Return the appropriate SPR number associated with the given builtin. */
20252
+static inline HOST_WIDE_INT
20253
+htm_spr_num (enum rs6000_builtins code)
20255
+ if (code == HTM_BUILTIN_GET_TFHAR
20256
+ || code == HTM_BUILTIN_SET_TFHAR)
20257
+ return TFHAR_SPR;
20258
+ else if (code == HTM_BUILTIN_GET_TFIAR
20259
+ || code == HTM_BUILTIN_SET_TFIAR)
20260
+ return TFIAR_SPR;
20261
+ else if (code == HTM_BUILTIN_GET_TEXASR
20262
+ || code == HTM_BUILTIN_SET_TEXASR)
20263
+ return TEXASR_SPR;
20264
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
20265
+ || code == HTM_BUILTIN_SET_TEXASRU);
20266
+ return TEXASRU_SPR;
20269
+/* Return the appropriate SPR regno associated with the given builtin. */
20270
+static inline HOST_WIDE_INT
20271
+htm_spr_regno (enum rs6000_builtins code)
20273
+ if (code == HTM_BUILTIN_GET_TFHAR
20274
+ || code == HTM_BUILTIN_SET_TFHAR)
20275
+ return TFHAR_REGNO;
20276
+ else if (code == HTM_BUILTIN_GET_TFIAR
20277
+ || code == HTM_BUILTIN_SET_TFIAR)
20278
+ return TFIAR_REGNO;
20279
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASR
20280
+ || code == HTM_BUILTIN_SET_TEXASR
20281
+ || code == HTM_BUILTIN_GET_TEXASRU
20282
+ || code == HTM_BUILTIN_SET_TEXASRU);
20283
+ return TEXASR_REGNO;
20286
+/* Return the correct ICODE value depending on whether we are
20287
+ setting or reading the HTM SPRs. */
20288
+static inline enum insn_code
20289
+rs6000_htm_spr_icode (bool nonvoid)
20292
+ return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
20294
+ return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
20297
+/* Expand the HTM builtin in EXP and store the result in TARGET.
20298
+ Store true in *EXPANDEDP if we found a builtin to expand. */
20300
+htm_expand_builtin (tree exp, rtx target, bool * expandedp)
20302
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20303
+ bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
20304
+ enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
20305
+ const struct builtin_description *d;
20308
+ *expandedp = false;
20310
+ /* Expand the HTM builtins. */
20312
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
20313
+ if (d->code == fcode)
20315
+ rtx op[MAX_HTM_OPERANDS], pat;
20318
+ call_expr_arg_iterator iter;
20319
+ unsigned attr = rs6000_builtin_info[fcode].attr;
20320
+ enum insn_code icode = d->icode;
20322
+ if (attr & RS6000_BTC_SPR)
20323
+ icode = rs6000_htm_spr_icode (nonvoid);
20327
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
20329
+ || GET_MODE (target) != tmode
20330
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20331
+ target = gen_reg_rtx (tmode);
20332
+ op[nopnds++] = target;
20335
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
20337
+ const struct insn_operand_data *insn_op;
20339
+ if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
20342
+ insn_op = &insn_data[icode].operand[nopnds];
20344
+ op[nopnds] = expand_normal (arg);
20346
+ if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
20348
+ if (!strcmp (insn_op->constraint, "n"))
20350
+ int arg_num = (nonvoid) ? nopnds : nopnds + 1;
20351
+ if (!CONST_INT_P (op[nopnds]))
20352
+ error ("argument %d must be an unsigned literal", arg_num);
20354
+ error ("argument %d is an unsigned literal that is "
20355
+ "out of range", arg_num);
20356
+ return const0_rtx;
20358
+ op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
20364
+ /* Handle the builtins for extended mnemonics. These accept
20365
+ no arguments, but map to builtins that take arguments. */
20368
+ case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
20369
+ case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
20370
+ op[nopnds++] = GEN_INT (1);
20371
+#ifdef ENABLE_CHECKING
20372
+ attr |= RS6000_BTC_UNARY;
20375
+ case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
20376
+ op[nopnds++] = GEN_INT (0);
20377
+#ifdef ENABLE_CHECKING
20378
+ attr |= RS6000_BTC_UNARY;
20385
+ /* If this builtin accesses SPRs, then pass in the appropriate
20386
+ SPR number and SPR regno as the last two operands. */
20387
+ if (attr & RS6000_BTC_SPR)
20389
+ op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
20390
+ op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
20393
+#ifdef ENABLE_CHECKING
20394
+ int expected_nopnds = 0;
20395
+ if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
20396
+ expected_nopnds = 1;
20397
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
20398
+ expected_nopnds = 2;
20399
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
20400
+ expected_nopnds = 3;
20401
+ if (!(attr & RS6000_BTC_VOID))
20402
+ expected_nopnds += 1;
20403
+ if (attr & RS6000_BTC_SPR)
20404
+ expected_nopnds += 2;
20406
+ gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
20412
+ pat = GEN_FCN (icode) (NULL_RTX);
20415
+ pat = GEN_FCN (icode) (op[0]);
20418
+ pat = GEN_FCN (icode) (op[0], op[1]);
20421
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20424
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20427
+ gcc_unreachable ();
20433
+ *expandedp = true;
20436
+ return const0_rtx;
20443
rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
20446
@@ -10428,7 +12262,28 @@
20450
+ else if (icode == CODE_FOR_crypto_vshasigmaw
20451
+ || icode == CODE_FOR_crypto_vshasigmad)
20453
+ /* Check whether the 2nd and 3rd arguments are integer constants and in
20454
+ range and prepare arguments. */
20455
+ STRIP_NOPS (arg1);
20456
+ if (TREE_CODE (arg1) != INTEGER_CST
20457
+ || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
20459
+ error ("argument 2 must be 0 or 1");
20460
+ return const0_rtx;
20463
+ STRIP_NOPS (arg2);
20464
+ if (TREE_CODE (arg2) != INTEGER_CST
20465
+ || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
20467
+ error ("argument 3 must be in the range 0..15");
20468
+ return const0_rtx;
20473
|| GET_MODE (target) != tmode
20474
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20475
@@ -10770,8 +12625,19 @@
20479
+ case ALTIVEC_BUILTIN_STVX_V2DF:
20480
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
20481
+ case ALTIVEC_BUILTIN_STVX_V2DI:
20482
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
20483
+ case ALTIVEC_BUILTIN_STVX_V4SF:
20484
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
20485
case ALTIVEC_BUILTIN_STVX:
20486
+ case ALTIVEC_BUILTIN_STVX_V4SI:
20487
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
20488
+ case ALTIVEC_BUILTIN_STVX_V8HI:
20489
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
20490
+ case ALTIVEC_BUILTIN_STVX_V16QI:
20491
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
20492
case ALTIVEC_BUILTIN_STVEBX:
20493
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
20494
case ALTIVEC_BUILTIN_STVEHX:
20495
@@ -10778,8 +12644,19 @@
20496
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
20497
case ALTIVEC_BUILTIN_STVEWX:
20498
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
20499
+ case ALTIVEC_BUILTIN_STVXL_V2DF:
20500
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
20501
+ case ALTIVEC_BUILTIN_STVXL_V2DI:
20502
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
20503
+ case ALTIVEC_BUILTIN_STVXL_V4SF:
20504
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
20505
case ALTIVEC_BUILTIN_STVXL:
20506
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
20507
+ case ALTIVEC_BUILTIN_STVXL_V4SI:
20508
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
20509
+ case ALTIVEC_BUILTIN_STVXL_V8HI:
20510
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
20511
+ case ALTIVEC_BUILTIN_STVXL_V16QI:
20512
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
20514
case ALTIVEC_BUILTIN_STVLX:
20515
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
20516
@@ -10923,12 +12800,44 @@
20517
case ALTIVEC_BUILTIN_LVEWX:
20518
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
20519
exp, target, false);
20520
+ case ALTIVEC_BUILTIN_LVXL_V2DF:
20521
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
20522
+ exp, target, false);
20523
+ case ALTIVEC_BUILTIN_LVXL_V2DI:
20524
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
20525
+ exp, target, false);
20526
+ case ALTIVEC_BUILTIN_LVXL_V4SF:
20527
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
20528
+ exp, target, false);
20529
case ALTIVEC_BUILTIN_LVXL:
20530
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
20531
+ case ALTIVEC_BUILTIN_LVXL_V4SI:
20532
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
20533
exp, target, false);
20534
+ case ALTIVEC_BUILTIN_LVXL_V8HI:
20535
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
20536
+ exp, target, false);
20537
+ case ALTIVEC_BUILTIN_LVXL_V16QI:
20538
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
20539
+ exp, target, false);
20540
+ case ALTIVEC_BUILTIN_LVX_V2DF:
20541
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
20542
+ exp, target, false);
20543
+ case ALTIVEC_BUILTIN_LVX_V2DI:
20544
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
20545
+ exp, target, false);
20546
+ case ALTIVEC_BUILTIN_LVX_V4SF:
20547
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
20548
+ exp, target, false);
20549
case ALTIVEC_BUILTIN_LVX:
20550
+ case ALTIVEC_BUILTIN_LVX_V4SI:
20551
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
20552
exp, target, false);
20553
+ case ALTIVEC_BUILTIN_LVX_V8HI:
20554
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
20555
+ exp, target, false);
20556
+ case ALTIVEC_BUILTIN_LVX_V16QI:
20557
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
20558
+ exp, target, false);
20559
case ALTIVEC_BUILTIN_LVLX:
20560
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
20561
exp, target, true);
20562
@@ -11412,6 +13321,8 @@
20563
error ("Builtin function %s is only valid for the cell processor", name);
20564
else if ((fnmask & RS6000_BTM_VSX) != 0)
20565
error ("Builtin function %s requires the -mvsx option", name);
20566
+ else if ((fnmask & RS6000_BTM_HTM) != 0)
20567
+ error ("Builtin function %s requires the -mhtm option", name);
20568
else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
20569
error ("Builtin function %s requires the -maltivec option", name);
20570
else if ((fnmask & RS6000_BTM_PAIRED) != 0)
20571
@@ -11516,7 +13427,8 @@
20572
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
20573
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
20575
- int icode = (int) CODE_FOR_altivec_lvsr;
20576
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
20577
+ : (int) CODE_FOR_altivec_lvsl);
20578
enum machine_mode tmode = insn_data[icode].operand[0].mode;
20579
enum machine_mode mode = insn_data[icode].operand[1].mode;
20581
@@ -11591,7 +13503,14 @@
20587
+ ret = htm_expand_builtin (exp, target, &success);
20593
gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
20595
/* Handle simple unary operations. */
20596
@@ -11773,6 +13692,9 @@
20597
spe_init_builtins ();
20598
if (TARGET_EXTRA_BUILTINS)
20599
altivec_init_builtins ();
20601
+ htm_init_builtins ();
20603
if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
20604
rs6000_common_init_builtins ();
20606
@@ -12118,6 +14040,10 @@
20607
= build_function_type_list (integer_type_node,
20608
integer_type_node, V4SI_type_node,
20609
V4SI_type_node, NULL_TREE);
20610
+ tree int_ftype_int_v2di_v2di
20611
+ = build_function_type_list (integer_type_node,
20612
+ integer_type_node, V2DI_type_node,
20613
+ V2DI_type_node, NULL_TREE);
20614
tree void_ftype_v4si
20615
= build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
20616
tree v8hi_ftype_void
20617
@@ -12200,6 +14126,8 @@
20618
= build_function_type_list (integer_type_node,
20619
integer_type_node, V2DF_type_node,
20620
V2DF_type_node, NULL_TREE);
20621
+ tree v2di_ftype_v2di
20622
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
20623
tree v4si_ftype_v4si
20624
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
20625
tree v8hi_ftype_v8hi
20626
@@ -12225,10 +14153,58 @@
20627
def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
20628
def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
20629
def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
20630
+ def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
20631
+ ALTIVEC_BUILTIN_LVXL_V2DF);
20632
+ def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
20633
+ ALTIVEC_BUILTIN_LVXL_V2DI);
20634
+ def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
20635
+ ALTIVEC_BUILTIN_LVXL_V4SF);
20636
+ def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
20637
+ ALTIVEC_BUILTIN_LVXL_V4SI);
20638
+ def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
20639
+ ALTIVEC_BUILTIN_LVXL_V8HI);
20640
+ def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
20641
+ ALTIVEC_BUILTIN_LVXL_V16QI);
20642
def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
20643
+ def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
20644
+ ALTIVEC_BUILTIN_LVX_V2DF);
20645
+ def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
20646
+ ALTIVEC_BUILTIN_LVX_V2DI);
20647
+ def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
20648
+ ALTIVEC_BUILTIN_LVX_V4SF);
20649
+ def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
20650
+ ALTIVEC_BUILTIN_LVX_V4SI);
20651
+ def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
20652
+ ALTIVEC_BUILTIN_LVX_V8HI);
20653
+ def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
20654
+ ALTIVEC_BUILTIN_LVX_V16QI);
20655
def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
20656
+ def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
20657
+ ALTIVEC_BUILTIN_STVX_V2DF);
20658
+ def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
20659
+ ALTIVEC_BUILTIN_STVX_V2DI);
20660
+ def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
20661
+ ALTIVEC_BUILTIN_STVX_V4SF);
20662
+ def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
20663
+ ALTIVEC_BUILTIN_STVX_V4SI);
20664
+ def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
20665
+ ALTIVEC_BUILTIN_STVX_V8HI);
20666
+ def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
20667
+ ALTIVEC_BUILTIN_STVX_V16QI);
20668
def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
20669
def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
20670
+ def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
20671
+ ALTIVEC_BUILTIN_STVXL_V2DF);
20672
+ def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
20673
+ ALTIVEC_BUILTIN_STVXL_V2DI);
20674
+ def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
20675
+ ALTIVEC_BUILTIN_STVXL_V4SF);
20676
+ def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
20677
+ ALTIVEC_BUILTIN_STVXL_V4SI);
20678
+ def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
20679
+ ALTIVEC_BUILTIN_STVXL_V8HI);
20680
+ def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
20681
+ ALTIVEC_BUILTIN_STVXL_V16QI);
20682
def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
20683
def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
20684
def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
20685
@@ -12335,6 +14311,9 @@
20687
type = int_ftype_int_opaque_opaque;
20690
+ type = int_ftype_int_v2di_v2di;
20693
type = int_ftype_int_v4si_v4si;
20695
@@ -12368,6 +14347,9 @@
20700
+ type = v2di_ftype_v2di;
20703
type = v4si_ftype_v4si;
20705
@@ -12500,6 +14482,79 @@
20706
def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
20710
+htm_init_builtins (void)
20712
+ HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
20713
+ const struct builtin_description *d;
20717
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
20719
+ tree op[MAX_HTM_OPERANDS], type;
20720
+ HOST_WIDE_INT mask = d->mask;
20721
+ unsigned attr = rs6000_builtin_info[d->code].attr;
20722
+ bool void_func = (attr & RS6000_BTC_VOID);
20723
+ int attr_args = (attr & RS6000_BTC_TYPE_MASK);
20725
+ tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
20726
+ : unsigned_type_node;
20728
+ if ((mask & builtin_mask) != mask)
20730
+ if (TARGET_DEBUG_BUILTIN)
20731
+ fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
20735
+ if (d->name == 0)
20737
+ if (TARGET_DEBUG_BUILTIN)
20738
+ fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
20739
+ (long unsigned) i);
20743
+ op[nopnds++] = (void_func) ? void_type_node : argtype;
20745
+ if (attr_args == RS6000_BTC_UNARY)
20746
+ op[nopnds++] = argtype;
20747
+ else if (attr_args == RS6000_BTC_BINARY)
20749
+ op[nopnds++] = argtype;
20750
+ op[nopnds++] = argtype;
20752
+ else if (attr_args == RS6000_BTC_TERNARY)
20754
+ op[nopnds++] = argtype;
20755
+ op[nopnds++] = argtype;
20756
+ op[nopnds++] = argtype;
20762
+ type = build_function_type_list (op[0], NULL_TREE);
20765
+ type = build_function_type_list (op[0], op[1], NULL_TREE);
20768
+ type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
20771
+ type = build_function_type_list (op[0], op[1], op[2], op[3],
20775
+ gcc_unreachable ();
20778
+ def_builtin (d->name, type, d->code);
20782
/* Hash function for builtin functions with up to 3 arguments and a return
20785
@@ -12573,11 +14628,27 @@
20786
are type correct. */
20789
+ /* unsigned 1 argument functions. */
20790
+ case CRYPTO_BUILTIN_VSBOX:
20791
+ case P8V_BUILTIN_VGBBD:
20796
/* unsigned 2 argument functions. */
20797
case ALTIVEC_BUILTIN_VMULEUB_UNS:
20798
case ALTIVEC_BUILTIN_VMULEUH_UNS:
20799
case ALTIVEC_BUILTIN_VMULOUB_UNS:
20800
case ALTIVEC_BUILTIN_VMULOUH_UNS:
20801
+ case CRYPTO_BUILTIN_VCIPHER:
20802
+ case CRYPTO_BUILTIN_VCIPHERLAST:
20803
+ case CRYPTO_BUILTIN_VNCIPHER:
20804
+ case CRYPTO_BUILTIN_VNCIPHERLAST:
20805
+ case CRYPTO_BUILTIN_VPMSUMB:
20806
+ case CRYPTO_BUILTIN_VPMSUMH:
20807
+ case CRYPTO_BUILTIN_VPMSUMW:
20808
+ case CRYPTO_BUILTIN_VPMSUMD:
20809
+ case CRYPTO_BUILTIN_VPMSUM:
20813
@@ -12600,6 +14671,14 @@
20814
case VSX_BUILTIN_XXSEL_8HI_UNS:
20815
case VSX_BUILTIN_XXSEL_4SI_UNS:
20816
case VSX_BUILTIN_XXSEL_2DI_UNS:
20817
+ case CRYPTO_BUILTIN_VPERMXOR:
20818
+ case CRYPTO_BUILTIN_VPERMXOR_V2DI:
20819
+ case CRYPTO_BUILTIN_VPERMXOR_V4SI:
20820
+ case CRYPTO_BUILTIN_VPERMXOR_V8HI:
20821
+ case CRYPTO_BUILTIN_VPERMXOR_V16QI:
20822
+ case CRYPTO_BUILTIN_VSHASIGMAW:
20823
+ case CRYPTO_BUILTIN_VSHASIGMAD:
20824
+ case CRYPTO_BUILTIN_VSHASIGMA:
20828
@@ -12741,9 +14820,24 @@
20831
enum insn_code icode = d->icode;
20832
- if (d->name == 0 || icode == CODE_FOR_nothing)
20834
+ if (d->name == 0)
20836
+ if (TARGET_DEBUG_BUILTIN)
20837
+ fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
20838
+ (long unsigned)i);
20843
+ if (icode == CODE_FOR_nothing)
20845
+ if (TARGET_DEBUG_BUILTIN)
20846
+ fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
20852
type = builtin_function_type (insn_data[icode].operand[0].mode,
20853
insn_data[icode].operand[1].mode,
20854
insn_data[icode].operand[2].mode,
20855
@@ -12781,9 +14875,24 @@
20858
enum insn_code icode = d->icode;
20859
- if (d->name == 0 || icode == CODE_FOR_nothing)
20861
+ if (d->name == 0)
20863
+ if (TARGET_DEBUG_BUILTIN)
20864
+ fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
20865
+ (long unsigned)i);
20870
+ if (icode == CODE_FOR_nothing)
20872
+ if (TARGET_DEBUG_BUILTIN)
20873
+ fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
20879
mode0 = insn_data[icode].operand[0].mode;
20880
mode1 = insn_data[icode].operand[1].mode;
20881
mode2 = insn_data[icode].operand[2].mode;
20882
@@ -12843,9 +14952,24 @@
20885
enum insn_code icode = d->icode;
20886
- if (d->name == 0 || icode == CODE_FOR_nothing)
20888
+ if (d->name == 0)
20890
+ if (TARGET_DEBUG_BUILTIN)
20891
+ fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
20892
+ (long unsigned)i);
20897
+ if (icode == CODE_FOR_nothing)
20899
+ if (TARGET_DEBUG_BUILTIN)
20900
+ fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
20906
mode0 = insn_data[icode].operand[0].mode;
20907
mode1 = insn_data[icode].operand[1].mode;
20909
@@ -13632,7 +15756,7 @@
20910
static bool eliminated = false;
20913
- if (mode != SDmode)
20914
+ if (mode != SDmode || TARGET_NO_SDMODE_STACK)
20915
ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20918
@@ -13661,6 +15785,17 @@
20922
+/* Return the mode to be used for memory when a secondary memory
20923
+ location is needed. For SDmode values we need to use DDmode, in
20924
+ all other cases we can use the same mode. */
20926
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
20928
+ if (mode == SDmode)
20934
rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
20936
@@ -13691,31 +15826,228 @@
20940
-enum reload_reg_type {
20941
- GPR_REGISTER_TYPE,
20942
- VECTOR_REGISTER_TYPE,
20943
- OTHER_REGISTER_TYPE
20945
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
20946
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
20947
+ only work on the traditional altivec registers, note if an altivec register
20950
-static enum reload_reg_type
20951
-rs6000_reload_register_type (enum reg_class rclass)
20952
+static enum rs6000_reg_type
20953
+register_to_reg_type (rtx reg, bool *is_altivec)
20956
+ HOST_WIDE_INT regno;
20957
+ enum reg_class rclass;
20959
+ if (GET_CODE (reg) == SUBREG)
20960
+ reg = SUBREG_REG (reg);
20962
+ if (!REG_P (reg))
20963
+ return NO_REG_TYPE;
20965
+ regno = REGNO (reg);
20966
+ if (regno >= FIRST_PSEUDO_REGISTER)
20968
- case GENERAL_REGS:
20970
- return GPR_REGISTER_TYPE;
20971
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
20972
+ return PSEUDO_REG_TYPE;
20975
- case ALTIVEC_REGS:
20977
- return VECTOR_REGISTER_TYPE;
20978
+ regno = true_regnum (reg);
20979
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
20980
+ return PSEUDO_REG_TYPE;
20984
- return OTHER_REGISTER_TYPE;
20985
+ gcc_assert (regno >= 0);
20987
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
20988
+ *is_altivec = true;
20990
+ rclass = rs6000_regno_regclass[regno];
20991
+ return reg_class_to_reg_type[(int)rclass];
20994
+/* Helper function for rs6000_secondary_reload to return true if a move to a
20995
+ different register classe is really a simple move. */
20998
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20999
+ enum rs6000_reg_type from_type,
21000
+ enum machine_mode mode)
21004
+ /* Add support for various direct moves available. In this function, we only
21005
+ look at cases where we don't need any extra registers, and one or more
21006
+ simple move insns are issued. At present, 32-bit integers are not allowed
21007
+ in FPR/VSX registers. Single precision binary floating is not a simple
21008
+ move because we need to convert to the single precision memory layout.
21009
+ The 4-byte SDmode can be moved. */
21010
+ size = GET_MODE_SIZE (mode);
21011
+ if (TARGET_DIRECT_MOVE
21012
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
21013
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21014
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
21017
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
21018
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
21019
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21022
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
21023
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
21024
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21030
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
21031
+ special direct moves that involve allocating an extra register, return the
21032
+ insn code of the helper function if there is such a function or
21033
+ CODE_FOR_nothing if not. */
21036
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
21037
+ enum rs6000_reg_type from_type,
21038
+ enum machine_mode mode,
21039
+ secondary_reload_info *sri,
21042
+ bool ret = false;
21043
+ enum insn_code icode = CODE_FOR_nothing;
21045
+ int size = GET_MODE_SIZE (mode);
21047
+ if (TARGET_POWERPC64)
21051
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
21052
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
21053
+ 64-bit values back together. */
21054
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21056
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
21057
+ icode = reg_addr[mode].reload_vsx_gpr;
21060
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
21061
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
21062
+ bottom 64-bit value. */
21063
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21065
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
21066
+ icode = reg_addr[mode].reload_gpr_vsx;
21070
+ else if (mode == SFmode)
21072
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21074
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
21075
+ icode = reg_addr[mode].reload_gpr_vsx;
21078
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21080
+ cost = 2; /* mtvsrz, xscvspdpn. */
21081
+ icode = reg_addr[mode].reload_vsx_gpr;
21086
+ if (TARGET_POWERPC64 && size == 16)
21088
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
21089
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
21090
+ 64-bit values back together. */
21091
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21093
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
21094
+ icode = reg_addr[mode].reload_vsx_gpr;
21097
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
21098
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
21099
+ bottom 64-bit value. */
21100
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21102
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
21103
+ icode = reg_addr[mode].reload_gpr_vsx;
21107
+ else if (!TARGET_POWERPC64 && size == 8)
21109
+ /* Handle moving 64-bit values from GPRs to floating point registers on
21110
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
21111
+ values back together. Altivec register classes must be handled
21112
+ specially since a different instruction is used, and the secondary
21113
+ reload support requires a single instruction class in the scratch
21114
+ register constraint. However, right now TFmode is not allowed in
21115
+ Altivec registers, so the pattern will never match. */
21116
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
21118
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
21119
+ icode = reg_addr[mode].reload_fpr_gpr;
21123
+ if (icode != CODE_FOR_nothing)
21128
+ sri->icode = icode;
21129
+ sri->extra_cost = cost;
21136
+/* Return whether a move between two register classes can be done either
21137
+ directly (simple move) or via a pattern that uses a single extra temporary
21138
+ (using power8's direct move in this case. */
21141
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
21142
+ enum rs6000_reg_type from_type,
21143
+ enum machine_mode mode,
21144
+ secondary_reload_info *sri,
21147
+ /* Fall back to load/store reloads if either type is not a register. */
21148
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
21151
+ /* If we haven't allocated registers yet, assume the move can be done for the
21152
+ standard register types. */
21153
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
21154
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
21155
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
21158
+ /* Moves to the same set of registers is a simple move for non-specialized
21160
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
21163
+ /* Check whether a simple move can be done directly. */
21164
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
21168
+ sri->icode = CODE_FOR_nothing;
21169
+ sri->extra_cost = 0;
21174
+ /* Now check if we can do it in a few steps. */
21175
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
21179
/* Inform reload about cases where moving X with a mode MODE to a register in
21180
RCLASS requires an extra scratch or immediate register. Return the class
21181
needed for the immediate register.
21182
@@ -13739,12 +16071,36 @@
21183
bool default_p = false;
21185
sri->icode = CODE_FOR_nothing;
21187
+ ? reg_addr[mode].reload_load
21188
+ : reg_addr[mode].reload_store);
21190
- /* Convert vector loads and stores into gprs to use an additional base
21192
- icode = rs6000_vector_reload[mode][in_p != false];
21193
- if (icode != CODE_FOR_nothing)
21194
+ if (REG_P (x) || register_operand (x, mode))
21196
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
21197
+ bool altivec_p = (rclass == ALTIVEC_REGS);
21198
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
21202
+ enum rs6000_reg_type exchange = to_type;
21203
+ to_type = from_type;
21204
+ from_type = exchange;
21207
+ /* Can we do a direct move of some sort? */
21208
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
21211
+ icode = (enum insn_code)sri->icode;
21212
+ default_p = false;
21217
+ /* Handle vector moves with reload helper functions. */
21218
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
21221
sri->icode = CODE_FOR_nothing;
21222
sri->extra_cost = 0;
21223
@@ -13755,14 +16111,23 @@
21225
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
21226
an extra register in that case, but it would need an extra
21227
- register if the addressing is reg+reg or (reg+reg)&(-16). */
21228
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
21229
+ case load/store quad. */
21230
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
21232
- if (!legitimate_indirect_address_p (addr, false)
21233
- && !rs6000_legitimate_offset_address_p (TImode, addr,
21235
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
21236
+ && GET_MODE_SIZE (mode) == 16
21237
+ && quad_memory_operand (x, mode))
21239
sri->icode = icode;
21240
+ sri->extra_cost = 2;
21243
+ else if (!legitimate_indirect_address_p (addr, false)
21244
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
21247
+ sri->icode = icode;
21248
/* account for splitting the loads, and converting the
21249
address from reg+reg to reg. */
21250
sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
21251
@@ -13769,8 +16134,20 @@
21252
+ ((GET_CODE (addr) == AND) ? 1 : 0));
21255
- /* Loads to and stores from vector registers can only do reg+reg
21256
- addressing. Altivec registers can also do (reg+reg)&(-16). */
21257
+ /* Allow scalar loads to/from the traditional floating point
21258
+ registers, even if VSX memory is set. */
21259
+ else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
21260
+ && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
21261
+ && (legitimate_indirect_address_p (addr, false)
21262
+ || legitimate_indirect_address_p (addr, false)
21263
+ || rs6000_legitimate_offset_address_p (mode, addr,
21267
+ /* Loads to and stores from vector registers can only do reg+reg
21268
+ addressing. Altivec registers can also do (reg+reg)&(-16). Allow
21269
+ scalar modes loading up the traditional floating point registers
21270
+ to use offset addresses. */
21271
else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
21272
|| rclass == FLOAT_REGS || rclass == NO_REGS)
21274
@@ -13814,12 +16191,12 @@
21277
enum reg_class xclass = REGNO_REG_CLASS (regno);
21278
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
21279
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
21280
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
21281
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
21283
/* If memory is needed, use default_secondary_reload to create the
21285
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
21286
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
21290
@@ -13829,7 +16206,7 @@
21293
else if (TARGET_POWERPC64
21294
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
21295
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
21297
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
21299
@@ -13868,7 +16245,7 @@
21302
else if (!TARGET_POWERPC64
21303
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
21304
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
21306
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
21308
@@ -13946,6 +16323,36 @@
21312
+/* Better tracing for rs6000_secondary_reload_inner. */
21315
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
21318
+ rtx set, clobber;
21320
+ gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
21322
+ fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
21323
+ store_p ? "store" : "load");
21326
+ set = gen_rtx_SET (VOIDmode, mem, reg);
21328
+ set = gen_rtx_SET (VOIDmode, reg, mem);
21330
+ clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
21331
+ debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
21335
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
21338
+ rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
21339
+ gcc_unreachable ();
21342
/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
21343
to SP+reg addressing. */
21345
@@ -13964,21 +16371,16 @@
21348
if (TARGET_DEBUG_ADDR)
21350
- fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
21351
- store_p ? "store" : "load");
21352
- fprintf (stderr, "reg:\n");
21354
- fprintf (stderr, "mem:\n");
21356
- fprintf (stderr, "scratch:\n");
21357
- debug_rtx (scratch);
21359
+ rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
21361
- gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
21362
- gcc_assert (GET_CODE (mem) == MEM);
21363
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21364
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21366
+ if (GET_CODE (mem) != MEM)
21367
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21369
rclass = REGNO_REG_CLASS (regno);
21370
- addr = XEXP (mem, 0);
21371
+ addr = find_replacement (&XEXP (mem, 0));
21375
@@ -13989,25 +16391,31 @@
21376
if (GET_CODE (addr) == AND)
21378
and_op2 = XEXP (addr, 1);
21379
- addr = XEXP (addr, 0);
21380
+ addr = find_replacement (&XEXP (addr, 0));
21383
if (GET_CODE (addr) == PRE_MODIFY)
21385
- scratch_or_premodify = XEXP (addr, 0);
21386
- gcc_assert (REG_P (scratch_or_premodify));
21387
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
21388
- addr = XEXP (addr, 1);
21389
+ scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21390
+ if (!REG_P (scratch_or_premodify))
21391
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21393
+ addr = find_replacement (&XEXP (addr, 1));
21394
+ if (GET_CODE (addr) != PLUS)
21395
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21398
if (GET_CODE (addr) == PLUS
21399
&& (and_op2 != NULL_RTX
21400
- || !rs6000_legitimate_offset_address_p (TImode, addr,
21401
+ || !rs6000_legitimate_offset_address_p (PTImode, addr,
21404
+ /* find_replacement already recurses into both operands of
21405
+ PLUS so we don't need to call it here. */
21406
addr_op1 = XEXP (addr, 0);
21407
addr_op2 = XEXP (addr, 1);
21408
- gcc_assert (legitimate_indirect_address_p (addr_op1, false));
21409
+ if (!legitimate_indirect_address_p (addr_op1, false))
21410
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21412
if (!REG_P (addr_op2)
21413
&& (GET_CODE (addr_op2) != CONST_INT
21414
@@ -14035,7 +16443,7 @@
21415
scratch_or_premodify = scratch;
21417
else if (!legitimate_indirect_address_p (addr, false)
21418
- && !rs6000_legitimate_offset_address_p (TImode, addr,
21419
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
21422
if (TARGET_DEBUG_ADDR)
21423
@@ -14051,9 +16459,21 @@
21427
- /* Float/Altivec registers can only handle reg+reg addressing. Move
21428
- other addresses into a scratch register. */
21429
+ /* Float registers can do offset+reg addressing for scalar types. */
21431
+ if (legitimate_indirect_address_p (addr, false) /* reg */
21432
+ || legitimate_indexed_address_p (addr, false) /* reg+reg */
21433
+ || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
21434
+ && and_op2 == NULL_RTX
21435
+ && scratch_or_premodify == scratch
21436
+ && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
21439
+ /* If this isn't a legacy floating point load/store, fall through to the
21442
+ /* VSX/Altivec registers can only handle reg+reg addressing. Move other
21443
+ addresses into a scratch register. */
21447
@@ -14067,35 +16487,35 @@
21448
|| !VECTOR_MEM_ALTIVEC_P (mode)))
21450
and_op2 = XEXP (addr, 1);
21451
- addr = XEXP (addr, 0);
21452
+ addr = find_replacement (&XEXP (addr, 0));
21455
/* If we aren't using a VSX load, save the PRE_MODIFY register and use it
21456
as the address later. */
21457
if (GET_CODE (addr) == PRE_MODIFY
21458
- && (!VECTOR_MEM_VSX_P (mode)
21459
+ && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
21460
+ && (rclass != FLOAT_REGS
21461
+ || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
21462
|| and_op2 != NULL_RTX
21463
|| !legitimate_indexed_address_p (XEXP (addr, 1), false)))
21465
- scratch_or_premodify = XEXP (addr, 0);
21466
- gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
21468
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
21469
- addr = XEXP (addr, 1);
21470
+ scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21471
+ if (!legitimate_indirect_address_p (scratch_or_premodify, false))
21472
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21474
+ addr = find_replacement (&XEXP (addr, 1));
21475
+ if (GET_CODE (addr) != PLUS)
21476
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21479
if (legitimate_indirect_address_p (addr, false) /* reg */
21480
|| legitimate_indexed_address_p (addr, false) /* reg+reg */
21481
- || GET_CODE (addr) == PRE_MODIFY /* VSX pre-modify */
21482
|| (GET_CODE (addr) == AND /* Altivec memory */
21483
+ && rclass == ALTIVEC_REGS
21484
&& GET_CODE (XEXP (addr, 1)) == CONST_INT
21485
&& INTVAL (XEXP (addr, 1)) == -16
21486
- && VECTOR_MEM_ALTIVEC_P (mode))
21487
- || (rclass == FLOAT_REGS /* legacy float mem */
21488
- && GET_MODE_SIZE (mode) == 8
21489
- && and_op2 == NULL_RTX
21490
- && scratch_or_premodify == scratch
21491
- && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
21492
+ && (legitimate_indirect_address_p (XEXP (addr, 0), false)
21493
+ || legitimate_indexed_address_p (XEXP (addr, 0), false))))
21496
else if (GET_CODE (addr) == PLUS)
21497
@@ -14102,7 +16522,8 @@
21499
addr_op1 = XEXP (addr, 0);
21500
addr_op2 = XEXP (addr, 1);
21501
- gcc_assert (REG_P (addr_op1));
21502
+ if (!REG_P (addr_op1))
21503
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21505
if (TARGET_DEBUG_ADDR)
21507
@@ -14121,7 +16542,8 @@
21510
else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
21511
- || GET_CODE (addr) == CONST_INT || REG_P (addr))
21512
+ || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
21515
if (TARGET_DEBUG_ADDR)
21517
@@ -14137,12 +16559,12 @@
21521
- gcc_unreachable ();
21522
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21527
- gcc_unreachable ();
21528
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21531
/* If the original address involved a pre-modify that we couldn't use the VSX
21532
@@ -14189,7 +16611,7 @@
21533
/* Adjust the address if it changed. */
21534
if (addr != XEXP (mem, 0))
21536
- mem = change_address (mem, mode, addr);
21537
+ mem = replace_equiv_address_nv (mem, addr);
21538
if (TARGET_DEBUG_ADDR)
21539
fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
21541
@@ -14254,8 +16676,10 @@
21545
-/* Allocate a 64-bit stack slot to be used for copying SDmode
21546
- values through if this function has any SDmode references. */
21547
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
21548
+ this function has any SDmode references. If we are on a power7 or later, we
21549
+ don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
21550
+ can load/store the value. */
21553
rs6000_alloc_sdmode_stack_slot (void)
21554
@@ -14265,7 +16689,14 @@
21555
gimple_stmt_iterator gsi;
21557
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
21558
+ /* We use a different approach for dealing with the secondary
21559
+ memory in LRA. */
21560
+ if (ira_use_lra_p)
21563
+ if (TARGET_NO_SDMODE_STACK)
21567
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21569
@@ -14326,8 +16757,7 @@
21571
enum machine_mode mode = GET_MODE (x);
21573
- if (VECTOR_UNIT_VSX_P (mode)
21574
- && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
21575
+ if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
21578
if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
21579
@@ -14335,8 +16765,14 @@
21580
&& easy_vector_constant (x, mode))
21581
return ALTIVEC_REGS;
21583
- if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
21585
+ if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
21587
+ if (reg_class_subset_p (GENERAL_REGS, rclass))
21588
+ return GENERAL_REGS;
21589
+ if (reg_class_subset_p (BASE_REGS, rclass))
21590
+ return BASE_REGS;
21594
if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21595
return GENERAL_REGS;
21596
@@ -14382,60 +16818,45 @@
21597
set and vice versa. */
21600
-rs6000_secondary_memory_needed (enum reg_class class1,
21601
- enum reg_class class2,
21602
+rs6000_secondary_memory_needed (enum reg_class from_class,
21603
+ enum reg_class to_class,
21604
enum machine_mode mode)
21606
- if (class1 == class2)
21608
+ enum rs6000_reg_type from_type, to_type;
21609
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
21610
+ || (to_class == ALTIVEC_REGS));
21612
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
21613
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
21614
- between these classes. But we need memory for other things that can go in
21615
- FLOAT_REGS like SFmode. */
21617
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
21618
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
21619
- || class1 == FLOAT_REGS))
21620
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
21621
- && class2 != FLOAT_REGS);
21622
+ /* If a simple/direct move is available, we don't need secondary memory */
21623
+ from_type = reg_class_to_reg_type[(int)from_class];
21624
+ to_type = reg_class_to_reg_type[(int)to_class];
21626
- if (class1 == VSX_REGS || class2 == VSX_REGS)
21628
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
21629
+ (secondary_reload_info *)0, altivec_p))
21632
- if (class1 == FLOAT_REGS
21633
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
21634
- || ((mode != DFmode)
21635
- && (mode != DDmode)
21636
- && (mode != DImode))))
21637
+ /* If we have a floating point or vector register class, we need to use
21638
+ memory to transfer the data. */
21639
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21642
- if (class2 == FLOAT_REGS
21643
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
21644
- || ((mode != DFmode)
21645
- && (mode != DDmode)
21646
- && (mode != DImode))))
21649
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
21655
/* Debug version of rs6000_secondary_memory_needed. */
21657
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
21658
- enum reg_class class2,
21659
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21660
+ enum reg_class to_class,
21661
enum machine_mode mode)
21663
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
21664
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21667
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
21668
- "class2 = %s, mode = %s\n",
21669
- ret ? "true" : "false", reg_class_names[class1],
21670
- reg_class_names[class2], GET_MODE_NAME (mode));
21671
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21672
+ "to_class = %s, mode = %s\n",
21673
+ ret ? "true" : "false",
21674
+ reg_class_names[from_class],
21675
+ reg_class_names[to_class],
21676
+ GET_MODE_NAME (mode));
21680
@@ -14499,15 +16920,21 @@
21681
/* Constants, memory, and FP registers can go into FP registers. */
21682
if ((regno == -1 || FP_REGNO_P (regno))
21683
&& (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21684
- return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
21685
+ return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21687
/* Memory, and FP/altivec registers can go into fp/altivec registers under
21689
+ VSX. However, for scalar variables, use the traditional floating point
21690
+ registers so that we can use offset+register addressing. */
21692
&& (regno == -1 || VSX_REGNO_P (regno))
21693
&& VSX_REG_CLASS_P (rclass))
21696
+ if (GET_MODE_SIZE (mode) < 16)
21697
+ return FLOAT_REGS;
21702
/* Memory, and AltiVec registers can go into AltiVec registers. */
21703
if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21704
&& rclass == ALTIVEC_REGS)
21705
@@ -14551,8 +16978,42 @@
21706
if (from_size != to_size)
21708
enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21709
- return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
21710
- && reg_classes_intersect_p (xclass, rclass));
21712
+ if (reg_classes_intersect_p (xclass, rclass))
21714
+ unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21715
+ unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21717
+ /* Don't allow 64-bit types to overlap with 128-bit types that take a
21718
+ single register under VSX because the scalar part of the register
21719
+ is in the upper 64-bits, and not the lower 64-bits. Types like
21720
+ TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21721
+ IEEE floating point can't overlap, and neither can small
21724
+ if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
21727
+ /* TDmode in floating-mode registers must always go into a register
21728
+ pair with the most significant word in the even-numbered register
21729
+ to match ISA requirements. In little-endian mode, this does not
21730
+ match subreg numbering, so we cannot allow subregs. */
21731
+ if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21734
+ if (from_size < 8 || to_size < 8)
21737
+ if (from_size == 8 && (8 * to_nregs) != to_size)
21740
+ if (to_size == 8 && (8 * from_nregs) != from_size)
21749
if (TARGET_E500_DOUBLE
21750
@@ -14566,10 +17027,19 @@
21751
/* Since the VSX register set includes traditional floating point registers
21752
and altivec registers, just check for the size being different instead of
21753
trying to check whether the modes are vector modes. Otherwise it won't
21754
- allow say DF and DI to change classes. */
21755
+ allow say DF and DI to change classes. For types like TFmode and TDmode
21756
+ that take 2 64-bit registers, rather than a single 128-bit register, don't
21757
+ allow subregs of those types to other 128 bit types. */
21758
if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21759
- return (from_size != 8 && from_size != 16);
21761
+ unsigned num_regs = (from_size + 15) / 16;
21762
+ if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21763
+ || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21766
+ return (from_size != 8 && from_size != 16);
21769
if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21770
&& (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21772
@@ -14599,6 +17069,161 @@
21776
+/* Return a string to do a move operation of 128 bits of data. */
21779
+rs6000_output_move_128bit (rtx operands[])
21781
+ rtx dest = operands[0];
21782
+ rtx src = operands[1];
21783
+ enum machine_mode mode = GET_MODE (dest);
21786
+ bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21787
+ bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21789
+ if (REG_P (dest))
21791
+ dest_regno = REGNO (dest);
21792
+ dest_gpr_p = INT_REGNO_P (dest_regno);
21793
+ dest_fp_p = FP_REGNO_P (dest_regno);
21794
+ dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21795
+ dest_vsx_p = dest_fp_p | dest_vmx_p;
21800
+ dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21805
+ src_regno = REGNO (src);
21806
+ src_gpr_p = INT_REGNO_P (src_regno);
21807
+ src_fp_p = FP_REGNO_P (src_regno);
21808
+ src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21809
+ src_vsx_p = src_fp_p | src_vmx_p;
21814
+ src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21817
+ /* Register moves. */
21818
+ if (dest_regno >= 0 && src_regno >= 0)
21825
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21829
+ else if (TARGET_VSX && dest_vsx_p)
21832
+ return "xxlor %x0,%x1,%x1";
21834
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
21838
+ else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21839
+ return "vor %0,%1,%1";
21841
+ else if (dest_fp_p && src_fp_p)
21846
+ else if (dest_regno >= 0 && MEM_P (src))
21850
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21851
+ return "lq %0,%1";
21856
+ else if (TARGET_ALTIVEC && dest_vmx_p
21857
+ && altivec_indexed_or_indirect_operand (src, mode))
21858
+ return "lvx %0,%y1";
21860
+ else if (TARGET_VSX && dest_vsx_p)
21862
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21863
+ return "lxvw4x %x0,%y1";
21865
+ return "lxvd2x %x0,%y1";
21868
+ else if (TARGET_ALTIVEC && dest_vmx_p)
21869
+ return "lvx %0,%y1";
21871
+ else if (dest_fp_p)
21876
+ else if (src_regno >= 0 && MEM_P (dest))
21880
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21881
+ return "stq %1,%0";
21886
+ else if (TARGET_ALTIVEC && src_vmx_p
21887
+ && altivec_indexed_or_indirect_operand (src, mode))
21888
+ return "stvx %1,%y0";
21890
+ else if (TARGET_VSX && src_vsx_p)
21892
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21893
+ return "stxvw4x %x1,%y0";
21895
+ return "stxvd2x %x1,%y0";
21898
+ else if (TARGET_ALTIVEC && src_vmx_p)
21899
+ return "stvx %1,%y0";
21901
+ else if (src_fp_p)
21906
+ else if (dest_regno >= 0
21907
+ && (GET_CODE (src) == CONST_INT
21908
+ || GET_CODE (src) == CONST_DOUBLE
21909
+ || GET_CODE (src) == CONST_VECTOR))
21914
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
21915
+ return "xxlxor %x0,%x0,%x0";
21917
+ else if (TARGET_ALTIVEC && dest_vmx_p)
21918
+ return output_vec_const_move (operands);
21921
+ if (TARGET_DEBUG_ADDR)
21923
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
21924
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
21927
+ gcc_unreachable ();
21931
/* Given a comparison operation, return the bit number in CCR to test. We
21932
know this is a valid comparison.
21934
@@ -14824,6 +17449,7 @@
21935
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21942
@@ -15303,7 +17929,7 @@
21946
- /* Like 'L', for third word of TImode */
21947
+ /* Like 'L', for third word of TImode/PTImode */
21949
fputs (reg_names[REGNO (x) + 2], file);
21950
else if (MEM_P (x))
21951
@@ -15353,7 +17979,7 @@
21955
- /* Like 'L', for last word of TImode. */
21956
+ /* Like 'L', for last word of TImode/PTImode. */
21958
fputs (reg_names[REGNO (x) + 3], file);
21959
else if (MEM_P (x))
21960
@@ -15384,7 +18010,8 @@
21961
if ((TARGET_SPE || TARGET_E500_DOUBLE)
21962
&& (GET_MODE_SIZE (GET_MODE (x)) == 8
21963
|| GET_MODE (x) == TFmode
21964
- || GET_MODE (x) == TImode))
21965
+ || GET_MODE (x) == TImode
21966
+ || GET_MODE (x) == PTImode))
21968
/* Handle [reg]. */
21970
@@ -16767,7 +19394,8 @@
21973
/* A subroutine of the atomic operation splitters. Emit a load-locked
21974
- instruction in MODE. */
21975
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
21976
+ the zero_extend operation. */
21979
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
21980
@@ -16776,12 +19404,26 @@
21985
+ fn = gen_load_lockedqi;
21988
+ fn = gen_load_lockedhi;
21991
- fn = gen_load_lockedsi;
21992
+ if (GET_MODE (mem) == QImode)
21993
+ fn = gen_load_lockedqi_si;
21994
+ else if (GET_MODE (mem) == HImode)
21995
+ fn = gen_load_lockedhi_si;
21997
+ fn = gen_load_lockedsi;
22000
fn = gen_load_lockeddi;
22003
+ fn = gen_load_lockedti;
22006
gcc_unreachable ();
22008
@@ -16798,6 +19440,12 @@
22013
+ fn = gen_store_conditionalqi;
22016
+ fn = gen_store_conditionalhi;
22019
fn = gen_store_conditionalsi;
22021
@@ -16804,6 +19452,9 @@
22023
fn = gen_store_conditionaldi;
22026
+ fn = gen_store_conditionalti;
22029
gcc_unreachable ();
22031
@@ -16950,7 +19601,7 @@
22033
rtx boolval, retval, mem, oldval, newval, cond;
22034
rtx label1, label2, x, mask, shift;
22035
- enum machine_mode mode;
22036
+ enum machine_mode mode, orig_mode;
22037
enum memmodel mod_s, mod_f;
22040
@@ -16962,23 +19613,30 @@
22041
is_weak = (INTVAL (operands[5]) != 0);
22042
mod_s = (enum memmodel) INTVAL (operands[6]);
22043
mod_f = (enum memmodel) INTVAL (operands[7]);
22044
- mode = GET_MODE (mem);
22045
+ orig_mode = mode = GET_MODE (mem);
22047
mask = shift = NULL_RTX;
22048
if (mode == QImode || mode == HImode)
22050
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22052
- /* Shift and mask OLDVAL into position with the word. */
22053
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
22054
+ lwarx and shift/mask operations. With power8, we need to do the
22055
+ comparison in SImode, but the store is still done in QI/HImode. */
22056
oldval = convert_modes (SImode, mode, oldval, 1);
22057
- oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22058
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
22060
- /* Shift and mask NEWVAL into position within the word. */
22061
- newval = convert_modes (SImode, mode, newval, 1);
22062
- newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22063
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
22064
+ if (!TARGET_SYNC_HI_QI)
22066
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22068
+ /* Shift and mask OLDVAL into position with the word. */
22069
+ oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22070
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
22072
+ /* Shift and mask NEWVAL into position within the word. */
22073
+ newval = convert_modes (SImode, mode, newval, 1);
22074
+ newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22075
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
22078
/* Prepare to adjust the return value. */
22079
retval = gen_reg_rtx (SImode);
22081
@@ -17006,7 +19664,25 @@
22084
cond = gen_reg_rtx (CCmode);
22085
- x = gen_rtx_COMPARE (CCmode, x, oldval);
22086
+ /* If we have TImode, synthesize a comparison. */
22087
+ if (mode != TImode)
22088
+ x = gen_rtx_COMPARE (CCmode, x, oldval);
22091
+ rtx xor1_result = gen_reg_rtx (DImode);
22092
+ rtx xor2_result = gen_reg_rtx (DImode);
22093
+ rtx or_result = gen_reg_rtx (DImode);
22094
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22095
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22096
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22097
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22099
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22100
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22101
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22102
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22105
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
22107
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22108
@@ -17016,7 +19692,7 @@
22110
x = rs6000_mask_atomic_subword (retval, newval, mask);
22112
- emit_store_conditional (mode, cond, mem, x);
22113
+ emit_store_conditional (orig_mode, cond, mem, x);
22117
@@ -17034,6 +19710,8 @@
22120
rs6000_finish_atomic_subword (operands[1], retval, shift);
22121
+ else if (mode != GET_MODE (operands[1]))
22122
+ convert_move (operands[1], retval, 1);
22124
/* In all cases, CR0 contains EQ on success, and NE on failure. */
22125
x = gen_rtx_EQ (SImode, cond, const0_rtx);
22126
@@ -17057,7 +19735,7 @@
22127
mode = GET_MODE (mem);
22129
mask = shift = NULL_RTX;
22130
- if (mode == QImode || mode == HImode)
22131
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22133
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22135
@@ -17106,53 +19784,70 @@
22137
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
22138
enum machine_mode mode = GET_MODE (mem);
22139
+ enum machine_mode store_mode = mode;
22140
rtx label, x, cond, mask, shift;
22141
rtx before = orig_before, after = orig_after;
22143
mask = shift = NULL_RTX;
22144
+ /* On power8, we want to use SImode for the operation. On previous systems,
22145
+ use the operation in a subword and shift/mask to get the proper byte or
22147
if (mode == QImode || mode == HImode)
22149
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22150
+ if (TARGET_SYNC_HI_QI)
22152
+ val = convert_modes (SImode, mode, val, 1);
22154
- /* Shift and mask VAL into position with the word. */
22155
- val = convert_modes (SImode, mode, val, 1);
22156
- val = expand_simple_binop (SImode, ASHIFT, val, shift,
22157
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
22160
+ /* Prepare to adjust the return value. */
22161
+ before = gen_reg_rtx (SImode);
22163
+ after = gen_reg_rtx (SImode);
22170
- /* We've already zero-extended VAL. That is sufficient to
22171
- make certain that it does not affect other bits. */
22174
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22177
- /* If we make certain that all of the other bits in VAL are
22178
- set, that will be sufficient to not affect other bits. */
22179
- x = gen_rtx_NOT (SImode, mask);
22180
- x = gen_rtx_IOR (SImode, x, val);
22181
- emit_insn (gen_rtx_SET (VOIDmode, val, x));
22184
+ /* Shift and mask VAL into position with the word. */
22185
+ val = convert_modes (SImode, mode, val, 1);
22186
+ val = expand_simple_binop (SImode, ASHIFT, val, shift,
22187
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
22192
- /* These will all affect bits outside the field and need
22193
- adjustment via MASK within the loop. */
22199
+ /* We've already zero-extended VAL. That is sufficient to
22200
+ make certain that it does not affect other bits. */
22205
- gcc_unreachable ();
22207
+ /* If we make certain that all of the other bits in VAL are
22208
+ set, that will be sufficient to not affect other bits. */
22209
+ x = gen_rtx_NOT (SImode, mask);
22210
+ x = gen_rtx_IOR (SImode, x, val);
22211
+ emit_insn (gen_rtx_SET (VOIDmode, val, x));
22218
+ /* These will all affect bits outside the field and need
22219
+ adjustment via MASK within the loop. */
22223
+ gcc_unreachable ();
22226
+ /* Prepare to adjust the return value. */
22227
+ before = gen_reg_rtx (SImode);
22229
+ after = gen_reg_rtx (SImode);
22230
+ store_mode = mode = SImode;
22233
- /* Prepare to adjust the return value. */
22234
- before = gen_reg_rtx (SImode);
22236
- after = gen_reg_rtx (SImode);
22240
mem = rs6000_pre_atomic_barrier (mem, model);
22241
@@ -17185,9 +19880,11 @@
22242
NULL_RTX, 1, OPTAB_LIB_WIDEN);
22243
x = rs6000_mask_atomic_subword (before, x, mask);
22245
+ else if (store_mode != mode)
22246
+ x = convert_modes (store_mode, mode, x, 1);
22248
cond = gen_reg_rtx (CCmode);
22249
- emit_store_conditional (mode, cond, mem, x);
22250
+ emit_store_conditional (store_mode, cond, mem, x);
22252
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22253
emit_unlikely_jump (x, label);
22254
@@ -17196,11 +19893,22 @@
22258
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22259
+ then do the calcuations in a SImode register. */
22261
rs6000_finish_atomic_subword (orig_before, before, shift);
22263
rs6000_finish_atomic_subword (orig_after, after, shift);
22265
+ else if (store_mode != mode)
22267
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
22268
+ operation and then do the calcuations in a SImode register. */
22270
+ convert_move (orig_before, before, 1);
22272
+ convert_move (orig_after, after, 1);
22274
else if (orig_after && after != orig_after)
22275
emit_move_insn (orig_after, after);
22277
@@ -17240,6 +19948,39 @@
22279
gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22281
+ /* TDmode residing in FP registers is special, since the ISA requires that
22282
+ the lower-numbered word of a register pair is always the most significant
22283
+ word, even in little-endian mode. This does not match the usual subreg
22284
+ semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22285
+ the appropriate constituent registers "by hand" in little-endian mode.
22287
+ Note we do not need to check for destructive overlap here since TDmode
22288
+ can only reside in even/odd register pairs. */
22289
+ if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22291
+ rtx p_src, p_dst;
22294
+ for (i = 0; i < nregs; i++)
22296
+ if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22297
+ p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22299
+ p_src = simplify_gen_subreg (reg_mode, src, mode,
22300
+ i * reg_mode_size);
22302
+ if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22303
+ p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22305
+ p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22306
+ i * reg_mode_size);
22308
+ emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
22314
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22316
/* Move register range backwards, if we might have destructive
22317
@@ -17694,7 +20435,7 @@
22321
- gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
22322
+ gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22323
if (info->first_fp_reg_save > 61)
22324
strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22325
strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22326
@@ -17705,7 +20446,8 @@
22327
by the static chain. It would require too much fiddling and the
22328
static chain is rarely used anyway. FPRs are saved w.r.t the stack
22329
pointer on Darwin, and AIX uses r1 or r12. */
22330
- if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
22331
+ if (using_static_chain_p
22332
+ && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
22333
strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
22335
| SAVE_INLINE_VRS | REST_INLINE_VRS);
22336
@@ -17838,7 +20580,35 @@
22337
The required alignment for AIX configurations is two words (i.e., 8
22340
+ The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
22342
+ SP----> +---------------------------------------+
22343
+ | Back chain to caller | 0
22344
+ +---------------------------------------+
22345
+ | Save area for CR | 8
22346
+ +---------------------------------------+
22348
+ +---------------------------------------+
22349
+ | Saved TOC pointer | 24
22350
+ +---------------------------------------+
22351
+ | Parameter save area (P) | 32
22352
+ +---------------------------------------+
22353
+ | Alloca space (A) | 32+P
22354
+ +---------------------------------------+
22355
+ | Local variable space (L) | 32+P+A
22356
+ +---------------------------------------+
22357
+ | Save area for AltiVec registers (W) | 32+P+A+L
22358
+ +---------------------------------------+
22359
+ | AltiVec alignment padding (Y) | 32+P+A+L+W
22360
+ +---------------------------------------+
22361
+ | Save area for GP registers (G) | 32+P+A+L+W+Y
22362
+ +---------------------------------------+
22363
+ | Save area for FP registers (F) | 32+P+A+L+W+Y+G
22364
+ +---------------------------------------+
22365
+ old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
22366
+ +---------------------------------------+
22369
V.4 stack frames look like:
22371
SP----> +---------------------------------------+
22372
@@ -17898,6 +20668,7 @@
22373
rs6000_stack_t *info_ptr = &stack_info;
22374
int reg_size = TARGET_32BIT ? 4 : 8;
22379
HOST_WIDE_INT non_fixed_size;
22380
@@ -17991,6 +20762,18 @@
22384
+ /* In the ELFv2 ABI, we also need to allocate space for separate
22385
+ CR field save areas if the function calls __builtin_eh_return. */
22386
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
22388
+ /* This hard-codes that we have three call-saved CR fields. */
22389
+ ehcr_size = 3 * reg_size;
22390
+ /* We do *not* use the regular CR save mechanism. */
22391
+ info_ptr->cr_save_p = 0;
22396
/* Determine various sizes. */
22397
info_ptr->reg_size = reg_size;
22398
info_ptr->fixed_size = RS6000_SAVE_AREA;
22399
@@ -18030,6 +20813,7 @@
22400
gcc_unreachable ();
22405
info_ptr->fp_save_offset = - info_ptr->fp_size;
22406
info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
22407
@@ -18059,6 +20843,8 @@
22410
info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
22412
+ info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
22413
info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
22414
info_ptr->lr_save_offset = 2*reg_size;
22416
@@ -18121,6 +20907,7 @@
22417
+ info_ptr->spe_gp_size
22418
+ info_ptr->spe_padding_size
22421
+ info_ptr->cr_size
22422
+ info_ptr->vrsave_size,
22424
@@ -18134,7 +20921,7 @@
22426
/* Determine if we need to save the link register. */
22427
if (info_ptr->calls_p
22428
- || (DEFAULT_ABI == ABI_AIX
22429
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22431
&& !TARGET_PROFILE_KERNEL)
22432
|| (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
22433
@@ -18280,6 +21067,7 @@
22434
default: abi_string = "Unknown"; break;
22435
case ABI_NONE: abi_string = "NONE"; break;
22436
case ABI_AIX: abi_string = "AIX"; break;
22437
+ case ABI_ELFv2: abi_string = "ELFv2"; break;
22438
case ABI_DARWIN: abi_string = "Darwin"; break;
22439
case ABI_V4: abi_string = "V.4"; break;
22441
@@ -18401,7 +21189,8 @@
22442
/* Currently we don't optimize very well between prolog and body
22443
code and for PIC code the code can be actually quite bad, so
22444
don't try to be too clever here. */
22445
- if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
22447
+ || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22449
cfun->machine->ra_needs_full_frame = 1;
22451
@@ -18460,13 +21249,13 @@
22455
- /* Under the AIX ABI we can't allow calls to non-local functions,
22456
- because the callee may have a different TOC pointer to the
22457
- caller and there's no way to ensure we restore the TOC when we
22458
- return. With the secure-plt SYSV ABI we can't make non-local
22459
+ /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22460
+ functions, because the callee may have a different TOC pointer to
22461
+ the caller and there's no way to ensure we restore the TOC when
22462
+ we return. With the secure-plt SYSV ABI we can't make non-local
22463
calls when -fpic/PIC because the plt call stubs use r30. */
22464
if (DEFAULT_ABI == ABI_DARWIN
22465
- || (DEFAULT_ABI == ABI_AIX
22466
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22468
&& !DECL_EXTERNAL (decl)
22469
&& (*targetm.binds_local_p) (decl))
22470
@@ -18567,7 +21356,7 @@
22472
dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22474
- if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
22475
+ if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22478
rtx lab, tmp1, tmp2, got;
22479
@@ -18595,7 +21384,7 @@
22480
emit_insn (gen_load_toc_v4_pic_si ());
22481
emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22483
- else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
22484
+ else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22487
rtx temp0 = (fromprolog
22488
@@ -18643,7 +21432,7 @@
22492
- gcc_assert (DEFAULT_ABI == ABI_AIX);
22493
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22496
emit_insn (gen_load_toc_aix_si (dest));
22497
@@ -19048,7 +21837,7 @@
22500
rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22501
- rtx reg2, rtx rreg)
22502
+ rtx reg2, rtx rreg, rtx split_reg)
22506
@@ -19139,6 +21928,11 @@
22510
+ /* If a store insn has been split into multiple insns, the
22511
+ true source register is given by split_reg. */
22512
+ if (split_reg != NULL_RTX)
22513
+ real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22515
RTX_FRAME_RELATED_P (insn) = 1;
22516
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22518
@@ -19246,7 +22040,7 @@
22519
reg = gen_rtx_REG (mode, regno);
22520
insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22521
return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22522
- NULL_RTX, NULL_RTX);
22523
+ NULL_RTX, NULL_RTX, NULL_RTX);
22526
/* Emit an offset memory reference suitable for a frame store, while
22527
@@ -19362,7 +22156,7 @@
22528
if ((sel & SAVRES_LR))
22531
- else if (DEFAULT_ABI == ABI_AIX)
22532
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22534
#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22535
/* No out-of-line save/restore routines for GPRs on AIX. */
22536
@@ -19503,7 +22297,7 @@
22537
static inline unsigned
22538
ptr_regno_for_savres (int sel)
22540
- if (DEFAULT_ABI == ABI_AIX)
22541
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22542
return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
22543
return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
22545
@@ -19588,6 +22382,43 @@
22549
+/* Emit code to store CR fields that need to be saved into REG. */
22552
+rs6000_emit_move_from_cr (rtx reg)
22554
+ /* Only the ELFv2 ABI allows storing only selected fields. */
22555
+ if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
22557
+ int i, cr_reg[8], count = 0;
22559
+ /* Collect CR fields that must be saved. */
22560
+ for (i = 0; i < 8; i++)
22561
+ if (save_reg_p (CR0_REGNO + i))
22562
+ cr_reg[count++] = i;
22564
+ /* If it's just a single one, use mfcrf. */
22567
+ rtvec p = rtvec_alloc (1);
22568
+ rtvec r = rtvec_alloc (2);
22569
+ RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
22570
+ RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
22572
+ = gen_rtx_SET (VOIDmode, reg,
22573
+ gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
22575
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22579
+ /* ??? It might be better to handle count == 2 / 3 cases here
22580
+ as well, using logical operations to combine the values. */
22583
+ emit_insn (gen_movesi_from_cr (reg));
22586
/* Determine whether the gp REG is really used. */
22589
@@ -19653,6 +22484,17 @@
22590
#define NOT_INUSE(R) do {} while (0)
22593
+ if (DEFAULT_ABI == ABI_ELFv2)
22595
+ cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
22597
+ /* With -mminimal-toc we may generate an extra use of r2 below. */
22598
+ if (!TARGET_SINGLE_PIC_BASE
22599
+ && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
22600
+ cfun->machine->r2_setup_needed = true;
22604
if (flag_stack_usage_info)
22605
current_function_static_stack_size = info->total_size;
22607
@@ -19767,7 +22609,7 @@
22609
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22610
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22611
- treg, GEN_INT (-info->total_size));
22612
+ treg, GEN_INT (-info->total_size), NULL_RTX);
22613
sp_off = frame_off = info->total_size;
22616
@@ -19852,7 +22694,7 @@
22618
insn = emit_move_insn (mem, reg);
22619
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22620
- NULL_RTX, NULL_RTX);
22621
+ NULL_RTX, NULL_RTX, NULL_RTX);
22625
@@ -19859,7 +22701,7 @@
22627
/* If we need to save CR, put it into r12 or r11. Choose r12 except when
22628
r12 will be needed by out-of-line gpr restore. */
22629
- cr_save_regno = (DEFAULT_ABI == ABI_AIX
22630
+ cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22631
&& !(strategy & (SAVE_INLINE_GPRS
22632
| SAVE_NOINLINE_GPRS_SAVES_LR))
22634
@@ -19868,21 +22710,9 @@
22635
&& REGNO (frame_reg_rtx) != cr_save_regno
22636
&& !(using_static_chain_p && cr_save_regno == 11))
22640
cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
22641
START_USE (cr_save_regno);
22642
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
22643
- RTX_FRAME_RELATED_P (insn) = 1;
22644
- /* Now, there's no way that dwarf2out_frame_debug_expr is going
22645
- to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
22646
- But that's OK. All we have to do is specify that _one_ condition
22647
- code register is saved in this stack slot. The thrower's epilogue
22648
- will then restore all the call-saved registers.
22649
- We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux. */
22650
- set = gen_rtx_SET (VOIDmode, cr_save_rtx,
22651
- gen_rtx_REG (SImode, CR2_REGNO));
22652
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
22653
+ rs6000_emit_move_from_cr (cr_save_rtx);
22656
/* Do any required saving of fpr's. If only one or two to save, do
22657
@@ -19920,7 +22750,7 @@
22658
info->lr_save_offset,
22660
rs6000_frame_related (insn, ptr_reg, sp_off,
22661
- NULL_RTX, NULL_RTX);
22662
+ NULL_RTX, NULL_RTX, NULL_RTX);
22666
@@ -19999,7 +22829,7 @@
22667
SAVRES_SAVE | SAVRES_GPR);
22669
rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
22670
- NULL_RTX, NULL_RTX);
22671
+ NULL_RTX, NULL_RTX, NULL_RTX);
22674
/* Move the static chain pointer back. */
22675
@@ -20049,7 +22879,7 @@
22676
info->lr_save_offset + ptr_off,
22678
rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
22679
- NULL_RTX, NULL_RTX);
22680
+ NULL_RTX, NULL_RTX, NULL_RTX);
22684
@@ -20065,7 +22895,7 @@
22685
info->gp_save_offset + frame_off + reg_size * i);
22686
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22687
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22688
- NULL_RTX, NULL_RTX);
22689
+ NULL_RTX, NULL_RTX, NULL_RTX);
22691
else if (!WORLD_SAVE_P (info))
22693
@@ -20134,7 +22964,8 @@
22694
be updated if we arrived at this function via a plt call or
22695
toc adjusting stub. */
22696
emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
22697
- toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
22698
+ toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
22699
+ + RS6000_TOC_SAVE_SLOT);
22700
hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
22701
emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
22702
compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
22703
@@ -20153,7 +22984,7 @@
22704
LABEL_NUSES (toc_save_done) += 1;
22706
save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
22707
- TOC_REGNUM, frame_off + 5 * reg_size,
22708
+ TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
22709
sp_off - frame_off);
22711
emit_label (toc_save_done);
22712
@@ -20193,28 +23024,123 @@
22713
rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
22714
GEN_INT (info->cr_save_offset + frame_off));
22715
rtx mem = gen_frame_mem (SImode, addr);
22716
- /* See the large comment above about why CR2_REGNO is used. */
22717
- rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
22719
/* If we didn't copy cr before, do so now using r0. */
22720
if (cr_save_rtx == NULL_RTX)
22725
cr_save_rtx = gen_rtx_REG (SImode, 0);
22726
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
22727
- RTX_FRAME_RELATED_P (insn) = 1;
22728
- set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
22729
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
22730
+ rs6000_emit_move_from_cr (cr_save_rtx);
22732
- insn = emit_move_insn (mem, cr_save_rtx);
22734
+ /* Saving CR requires a two-instruction sequence: one instruction
22735
+ to move the CR to a general-purpose register, and a second
22736
+ instruction that stores the GPR to memory.
22738
+ We do not emit any DWARF CFI records for the first of these,
22739
+ because we cannot properly represent the fact that CR is saved in
22740
+ a register. One reason is that we cannot express that multiple
22741
+ CR fields are saved; another reason is that on 64-bit, the size
22742
+ of the CR register in DWARF (4 bytes) differs from the size of
22743
+ a general-purpose register.
22745
+ This means if any intervening instruction were to clobber one of
22746
+ the call-saved CR fields, we'd have incorrect CFI. To prevent
22747
+ this from happening, we mark the store to memory as a use of
22748
+ those CR fields, which prevents any such instruction from being
22749
+ scheduled in between the two instructions. */
22751
+ int n_crsave = 0;
22754
+ crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
22755
+ for (i = 0; i < 8; i++)
22756
+ if (save_reg_p (CR0_REGNO + i))
22757
+ crsave_v[n_crsave++]
22758
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
22760
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
22761
+ gen_rtvec_v (n_crsave, crsave_v)));
22762
END_USE (REGNO (cr_save_rtx));
22764
- rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22765
- NULL_RTX, NULL_RTX);
22766
+ /* Now, there's no way that dwarf2out_frame_debug_expr is going to
22767
+ understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
22768
+ so we need to construct a frame expression manually. */
22769
+ RTX_FRAME_RELATED_P (insn) = 1;
22771
+ /* Update address to be stack-pointer relative, like
22772
+ rs6000_frame_related would do. */
22773
+ addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
22774
+ GEN_INT (info->cr_save_offset + sp_off));
22775
+ mem = gen_frame_mem (SImode, addr);
22777
+ if (DEFAULT_ABI == ABI_ELFv2)
22779
+ /* In the ELFv2 ABI we generate separate CFI records for each
22780
+ CR field that was actually saved. They all point to the
22781
+ same 32-bit stack slot. */
22783
+ int n_crframe = 0;
22785
+ for (i = 0; i < 8; i++)
22786
+ if (save_reg_p (CR0_REGNO + i))
22788
+ crframe[n_crframe]
22789
+ = gen_rtx_SET (VOIDmode, mem,
22790
+ gen_rtx_REG (SImode, CR0_REGNO + i));
22792
+ RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
22796
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22797
+ gen_rtx_PARALLEL (VOIDmode,
22798
+ gen_rtvec_v (n_crframe, crframe)));
22802
+ /* In other ABIs, by convention, we use a single CR regnum to
22803
+ represent the fact that all call-saved CR fields are saved.
22804
+ We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
22805
+ rtx set = gen_rtx_SET (VOIDmode, mem,
22806
+ gen_rtx_REG (SImode, CR2_REGNO));
22807
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
22811
+ /* In the ELFv2 ABI we need to save all call-saved CR fields into
22812
+ *separate* slots if the routine calls __builtin_eh_return, so
22813
+ that they can be independently restored by the unwinder. */
22814
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
22816
+ int i, cr_off = info->ehcr_offset;
22819
+ /* ??? We might get better performance by using multiple mfocrf
22821
+ crsave = gen_rtx_REG (SImode, 0);
22822
+ emit_insn (gen_movesi_from_cr (crsave));
22824
+ for (i = 0; i < 8; i++)
22825
+ if (!call_used_regs[CR0_REGNO + i])
22827
+ rtvec p = rtvec_alloc (2);
22829
+ = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
22831
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
22833
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22835
+ RTX_FRAME_RELATED_P (insn) = 1;
22836
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22837
+ gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
22838
+ sp_reg_rtx, cr_off + sp_off));
22840
+ cr_off += reg_size;
22844
/* Update stack and set back pointer unless this is V.4,
22845
for which it was done previously. */
22846
if (!WORLD_SAVE_P (info) && info->push_p
22847
@@ -20292,7 +23218,7 @@
22848
info->altivec_save_offset + ptr_off,
22849
0, V4SImode, SAVRES_SAVE | SAVRES_VR);
22850
rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
22851
- NULL_RTX, NULL_RTX);
22852
+ NULL_RTX, NULL_RTX, NULL_RTX);
22853
if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
22855
/* The oddity mentioned above clobbered our frame reg. */
22856
@@ -20308,7 +23234,7 @@
22857
for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22858
if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22860
- rtx areg, savereg, mem;
22861
+ rtx areg, savereg, mem, split_reg;
22864
offset = (info->altivec_save_offset + frame_off
22865
@@ -20326,8 +23252,18 @@
22867
insn = emit_move_insn (mem, savereg);
22869
+ /* When we split a VSX store into two insns, we need to make
22870
+ sure the DWARF info knows which register we are storing.
22871
+ Pass it in to be used on the appropriate note. */
22872
+ if (!BYTES_BIG_ENDIAN
22873
+ && GET_CODE (PATTERN (insn)) == SET
22874
+ && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
22875
+ split_reg = savereg;
22877
+ split_reg = NULL_RTX;
22879
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22880
- areg, GEN_INT (offset));
22881
+ areg, GEN_INT (offset), split_reg);
22885
@@ -20351,7 +23287,8 @@
22886
be using r12 as frame_reg_rtx and r11 as the static chain
22887
pointer for nested functions. */
22889
- if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
22890
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22891
+ && !using_static_chain_p)
22893
else if (REGNO (frame_reg_rtx) == 12)
22895
@@ -20390,7 +23327,7 @@
22896
can use register 0. This allows us to use a plain 'blr' to return
22897
from the procedure more often. */
22898
int save_LR_around_toc_setup = (TARGET_ELF
22899
- && DEFAULT_ABI != ABI_AIX
22900
+ && DEFAULT_ABI == ABI_V4
22902
&& ! info->lr_save_p
22903
&& EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
22904
@@ -20452,7 +23389,7 @@
22905
if (rs6000_save_toc_in_prologue_p ())
22907
rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
22908
- emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
22909
+ emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
22913
@@ -20493,6 +23430,49 @@
22917
+ /* ELFv2 ABI r2 setup code and local entry point. This must follow
22918
+ immediately after the global entry point label. */
22919
+ if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
22921
+ const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22923
+ fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
22924
+ fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
22926
+ fputs ("\t.localentry\t", file);
22927
+ assemble_name (file, name);
22928
+ fputs (",.-", file);
22929
+ assemble_name (file, name);
22930
+ fputs ("\n", file);
22933
+ /* Output -mprofile-kernel code. This needs to be done here instead of
22934
+ in output_function_profile since it must go after the ELFv2 ABI
22935
+ local entry point. */
22936
+ if (TARGET_PROFILE_KERNEL)
22938
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22939
+ gcc_assert (!TARGET_32BIT);
22941
+ asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
22942
+ asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
22944
+ /* In the ELFv2 ABI we have no compiler stack word. It must be
22945
+ the resposibility of _mcount to preserve the static chain
22946
+ register if required. */
22947
+ if (DEFAULT_ABI != ABI_ELFv2
22948
+ && cfun->static_chain_decl != NULL)
22950
+ asm_fprintf (file, "\tstd %s,24(%s)\n",
22951
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
22952
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
22953
+ asm_fprintf (file, "\tld %s,24(%s)\n",
22954
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
22957
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
22960
rs6000_pic_labelno++;
22963
@@ -20545,6 +23525,7 @@
22965
if (using_mfcr_multiple && count > 1)
22971
@@ -20562,16 +23543,43 @@
22972
gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
22975
- emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22976
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22977
gcc_assert (ndx == count);
22979
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
22980
+ CR field separately. */
22981
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
22983
+ for (i = 0; i < 8; i++)
22984
+ if (save_reg_p (CR0_REGNO + i))
22985
+ add_reg_note (insn, REG_CFA_RESTORE,
22986
+ gen_rtx_REG (SImode, CR0_REGNO + i));
22988
+ RTX_FRAME_RELATED_P (insn) = 1;
22992
for (i = 0; i < 8; i++)
22993
if (save_reg_p (CR0_REGNO + i))
22994
- emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
22997
+ rtx insn = emit_insn (gen_movsi_to_cr_one
22998
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
23000
- if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
23001
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
23002
+ CR field separately, attached to the insn that in fact
23003
+ restores this particular CR field. */
23004
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
23006
+ add_reg_note (insn, REG_CFA_RESTORE,
23007
+ gen_rtx_REG (SImode, CR0_REGNO + i));
23009
+ RTX_FRAME_RELATED_P (insn) = 1;
23013
+ /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
23014
+ if (!exit_func && DEFAULT_ABI != ABI_ELFv2
23015
+ && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
23017
rtx insn = get_last_insn ();
23018
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
23019
@@ -20612,10 +23620,22 @@
23021
add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
23023
- if (info->cr_save_p)
23024
+ if (DEFAULT_ABI == ABI_ELFv2)
23027
+ for (i = 0; i < 8; i++)
23028
+ if (save_reg_p (CR0_REGNO + i))
23030
+ rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
23031
+ cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
23035
+ else if (info->cr_save_p)
23036
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
23037
gen_rtx_REG (SImode, CR2_REGNO),
23040
if (info->lr_save_p)
23041
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
23042
gen_rtx_REG (Pmode, LR_REGNO),
23043
@@ -21113,6 +24133,35 @@
23044
|| (!restoring_GPRs_inline
23045
&& info->first_fp_reg_save == 64));
23047
+ /* In the ELFv2 ABI we need to restore all call-saved CR fields from
23048
+ *separate* slots if the routine calls __builtin_eh_return, so
23049
+ that they can be independently restored by the unwinder. */
23050
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23052
+ int i, cr_off = info->ehcr_offset;
23054
+ for (i = 0; i < 8; i++)
23055
+ if (!call_used_regs[CR0_REGNO + i])
23057
+ rtx reg = gen_rtx_REG (SImode, 0);
23058
+ emit_insn (gen_frame_load (reg, frame_reg_rtx,
23059
+ cr_off + frame_off));
23061
+ insn = emit_insn (gen_movsi_to_cr_one
23062
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
23064
+ if (!exit_func && flag_shrink_wrap)
23066
+ add_reg_note (insn, REG_CFA_RESTORE,
23067
+ gen_rtx_REG (SImode, CR0_REGNO + i));
23069
+ RTX_FRAME_RELATED_P (insn) = 1;
23072
+ cr_off += reg_size;
23076
/* Get the old lr if we saved it. If we are restoring registers
23077
out-of-line, then the out-of-line routines can do this for us. */
23078
if (restore_lr && restoring_GPRs_inline)
23079
@@ -21156,7 +24205,7 @@
23081
rtx reg = gen_rtx_REG (reg_mode, 2);
23082
emit_insn (gen_frame_load (reg, frame_reg_rtx,
23083
- frame_off + 5 * reg_size));
23084
+ frame_off + RS6000_TOC_SAVE_SLOT));
23088
@@ -21442,6 +24491,7 @@
23089
if (! restoring_FPRs_inline)
23095
if (flag_shrink_wrap)
23096
@@ -21450,10 +24500,9 @@
23097
sym = rs6000_savres_routine_sym (info,
23098
SAVRES_FPR | (lr ? SAVRES_LR : 0));
23099
RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
23100
- RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
23101
- gen_rtx_REG (Pmode,
23102
- DEFAULT_ABI == ABI_AIX
23104
+ reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
23105
+ RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
23107
for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23109
rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
23110
@@ -21531,7 +24580,8 @@
23112
System V.4 Powerpc's (and the embedded ABI derived from it) use a
23113
different traceback table. */
23114
- if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
23115
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23116
+ && ! flag_inhibit_size_directive
23117
&& rs6000_traceback != traceback_none && !cfun->is_thunk)
23119
const char *fname = NULL;
23120
@@ -21859,6 +24909,12 @@
23121
SIBLING_CALL_P (insn) = 1;
23124
+ /* Ensure we have a global entry point for the thunk. ??? We could
23125
+ avoid that if the target routine doesn't need a global entry point,
23126
+ but we do not know whether this is the case at this point. */
23127
+ if (DEFAULT_ABI == ABI_ELFv2)
23128
+ cfun->machine->r2_setup_needed = true;
23130
/* Run just enough of rest_of_compilation to get the insns emitted.
23131
There's not really enough bulk here to make other passes such as
23132
instruction scheduling worth while. Note that use_thunk calls
23133
@@ -22555,7 +25611,7 @@
23134
if (TARGET_PROFILE_KERNEL)
23137
- if (DEFAULT_ABI == ABI_AIX)
23138
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23140
#ifndef NO_PROFILE_COUNTERS
23141
# define NO_PROFILE_COUNTERS 0
23142
@@ -22699,29 +25755,9 @@
23148
- if (!TARGET_PROFILE_KERNEL)
23150
- /* Don't do anything, done in output_profile_hook (). */
23154
- gcc_assert (!TARGET_32BIT);
23156
- asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
23157
- asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
23159
- if (cfun->static_chain_decl != NULL)
23161
- asm_fprintf (file, "\tstd %s,24(%s)\n",
23162
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
23163
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
23164
- asm_fprintf (file, "\tld %s,24(%s)\n",
23165
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
23168
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
23170
+ /* Don't do anything, done in output_profile_hook (). */
23174
@@ -22847,6 +25883,7 @@
23175
|| rs6000_cpu_attr == CPU_POWER4
23176
|| rs6000_cpu_attr == CPU_POWER5
23177
|| rs6000_cpu_attr == CPU_POWER7
23178
+ || rs6000_cpu_attr == CPU_POWER8
23179
|| rs6000_cpu_attr == CPU_CELL)
23180
&& recog_memoized (dep_insn)
23181
&& (INSN_CODE (dep_insn) >= 0))
23182
@@ -23129,7 +26166,8 @@
23183
if (rs6000_cpu_attr == CPU_CELL)
23184
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
23186
- if (rs6000_sched_groups)
23187
+ if (rs6000_sched_groups
23188
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
23190
enum attr_type type = get_attr_type (insn);
23191
if (type == TYPE_LOAD_EXT_U
23192
@@ -23154,7 +26192,8 @@
23193
|| GET_CODE (PATTERN (insn)) == CLOBBER)
23196
- if (rs6000_sched_groups)
23197
+ if (rs6000_sched_groups
23198
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
23200
enum attr_type type = get_attr_type (insn);
23201
if (type == TYPE_LOAD_U || type == TYPE_STORE_U
23202
@@ -23433,6 +26472,8 @@
23211
@@ -24060,6 +27101,39 @@
23215
+ case PROCESSOR_POWER8:
23216
+ type = get_attr_type (insn);
23220
+ case TYPE_CR_LOGICAL:
23221
+ case TYPE_DELAYED_CR:
23225
+ case TYPE_COMPARE:
23226
+ case TYPE_DELAYED_COMPARE:
23227
+ case TYPE_VAR_DELAYED_COMPARE:
23228
+ case TYPE_IMUL_COMPARE:
23229
+ case TYPE_LMUL_COMPARE:
23232
+ case TYPE_LOAD_L:
23233
+ case TYPE_STORE_C:
23234
+ case TYPE_LOAD_U:
23235
+ case TYPE_LOAD_UX:
23236
+ case TYPE_LOAD_EXT:
23237
+ case TYPE_LOAD_EXT_U:
23238
+ case TYPE_LOAD_EXT_UX:
23239
+ case TYPE_STORE_UX:
23240
+ case TYPE_VECSTORE:
23241
+ case TYPE_MFJMPR:
23242
+ case TYPE_MTJMPR:
23251
@@ -24138,6 +27212,25 @@
23255
+ case PROCESSOR_POWER8:
23256
+ type = get_attr_type (insn);
23264
+ case TYPE_LOAD_L:
23265
+ case TYPE_STORE_C:
23266
+ case TYPE_LOAD_EXT_U:
23267
+ case TYPE_LOAD_EXT_UX:
23268
+ case TYPE_STORE_UX:
23277
@@ -24227,8 +27320,9 @@
23278
if (can_issue_more && !is_branch_slot_insn (next_insn))
23281
- /* Power6 and Power7 have special group ending nop. */
23282
- if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
23283
+ /* Do we have a special group ending nop? */
23284
+ if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
23285
+ || rs6000_cpu_attr == CPU_POWER8)
23287
nop = gen_group_ending_nop ();
23288
emit_insn_before (nop, next_insn);
23289
@@ -24599,6 +27693,11 @@
23290
ret = (TARGET_32BIT) ? 12 : 24;
23294
+ gcc_assert (!TARGET_32BIT);
23300
ret = (TARGET_32BIT) ? 40 : 48;
23301
@@ -24654,6 +27753,7 @@
23304
/* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
23308
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
23309
@@ -24948,7 +28048,7 @@
23311
rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
23313
- if (DEFAULT_ABI == ABI_AIX
23314
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23315
&& TARGET_MINIMAL_TOC
23316
&& !TARGET_RELOCATABLE)
23318
@@ -24969,7 +28069,8 @@
23320
fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
23322
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
23323
+ else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23324
+ && !TARGET_RELOCATABLE)
23325
fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
23328
@@ -25519,7 +28620,7 @@
23332
- else if (DEFAULT_ABI == ABI_AIX)
23333
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23337
@@ -25595,7 +28696,7 @@
23339
rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
23341
- if (TARGET_64BIT)
23342
+ if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
23344
fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
23345
ASM_OUTPUT_LABEL (file, name);
23346
@@ -25661,8 +28762,7 @@
23347
fprintf (file, "%s:\n", desc_name);
23348
fprintf (file, "\t.long %s\n", orig_name);
23349
fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
23350
- if (DEFAULT_ABI == ABI_AIX)
23351
- fputs ("\t.long 0\n", file);
23352
+ fputs ("\t.long 0\n", file);
23353
fprintf (file, "\t.previous\n");
23355
ASM_OUTPUT_LABEL (file, name);
23356
@@ -25691,7 +28791,7 @@
23359
#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23360
- if (TARGET_32BIT)
23361
+ if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
23362
file_end_indicate_exec_stack ();
23365
@@ -26431,7 +29531,8 @@
23366
/* For those processors that have slow LR/CTR moves, make them more
23367
expensive than memory in order to bias spills to memory .*/
23368
else if ((rs6000_cpu == PROCESSOR_POWER6
23369
- || rs6000_cpu == PROCESSOR_POWER7)
23370
+ || rs6000_cpu == PROCESSOR_POWER7
23371
+ || rs6000_cpu == PROCESSOR_POWER8)
23372
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
23373
ret = 6 * hard_regno_nregs[0][mode];
23375
@@ -26441,7 +29542,7 @@
23378
/* If we have VSX, we can easily move between FPR or Altivec registers. */
23379
- else if (VECTOR_UNIT_VSX_P (mode)
23380
+ else if (VECTOR_MEM_VSX_P (mode)
23381
&& reg_classes_intersect_p (to, VSX_REGS)
23382
&& reg_classes_intersect_p (from, VSX_REGS))
23383
ret = 2 * hard_regno_nregs[32][mode];
23384
@@ -26482,7 +29583,8 @@
23386
if (reg_classes_intersect_p (rclass, GENERAL_REGS))
23387
ret = 4 * hard_regno_nregs[0][mode];
23388
- else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
23389
+ else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
23390
+ || reg_classes_intersect_p (rclass, VSX_REGS)))
23391
ret = 4 * hard_regno_nregs[32][mode];
23392
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
23393
ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
23394
@@ -26644,54 +29746,26 @@
23395
emit_insn (gen_rtx_SET (VOIDmode, dst, r));
23398
-/* Newton-Raphson approximation of floating point divide with just 2 passes
23399
- (either single precision floating point, or newer machines with higher
23400
- accuracy estimates). Support both scalar and vector divide. Assumes no
23401
- trapping math and finite arguments. */
23402
+/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23403
+ add a reg_note saying that this was a division. Support both scalar and
23404
+ vector divide. Assumes no trapping math and finite arguments. */
23407
-rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
23409
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23411
enum machine_mode mode = GET_MODE (dst);
23412
- rtx x0, e0, e1, y1, u0, v0;
23413
- enum insn_code code = optab_handler (smul_optab, mode);
23414
- insn_gen_fn gen_mul = GEN_FCN (code);
23415
- rtx one = rs6000_load_constant_and_splat (mode, dconst1);
23416
+ rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
23419
- gcc_assert (code != CODE_FOR_nothing);
23420
+ /* Low precision estimates guarantee 5 bits of accuracy. High
23421
+ precision estimates guarantee 14 bits of accuracy. SFmode
23422
+ requires 23 bits of accuracy. DFmode requires 52 bits of
23423
+ accuracy. Each pass at least doubles the accuracy, leading
23424
+ to the following. */
23425
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23426
+ if (mode == DFmode || mode == V2DFmode)
23429
- /* x0 = 1./d estimate */
23430
- x0 = gen_reg_rtx (mode);
23431
- emit_insn (gen_rtx_SET (VOIDmode, x0,
23432
- gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23435
- e0 = gen_reg_rtx (mode);
23436
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - (d * x0) */
23438
- e1 = gen_reg_rtx (mode);
23439
- rs6000_emit_madd (e1, e0, e0, e0); /* e1 = (e0 * e0) + e0 */
23441
- y1 = gen_reg_rtx (mode);
23442
- rs6000_emit_madd (y1, e1, x0, x0); /* y1 = (e1 * x0) + x0 */
23444
- u0 = gen_reg_rtx (mode);
23445
- emit_insn (gen_mul (u0, n, y1)); /* u0 = n * y1 */
23447
- v0 = gen_reg_rtx (mode);
23448
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - (d * u0) */
23450
- rs6000_emit_madd (dst, v0, y1, u0); /* dst = (v0 * y1) + u0 */
23453
-/* Newton-Raphson approximation of floating point divide that has a low
23454
- precision estimate. Assumes no trapping math and finite arguments. */
23457
-rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
23459
- enum machine_mode mode = GET_MODE (dst);
23460
- rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
23461
enum insn_code code = optab_handler (smul_optab, mode);
23462
insn_gen_fn gen_mul = GEN_FCN (code);
23464
@@ -26705,47 +29779,45 @@
23465
gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23468
- e0 = gen_reg_rtx (mode);
23469
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - d * x0 */
23470
+ /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23471
+ if (passes > 1) {
23473
- y1 = gen_reg_rtx (mode);
23474
- rs6000_emit_madd (y1, e0, x0, x0); /* y1 = x0 + e0 * x0 */
23475
+ /* e0 = 1. - d * x0 */
23476
+ e0 = gen_reg_rtx (mode);
23477
+ rs6000_emit_nmsub (e0, d, x0, one);
23479
- e1 = gen_reg_rtx (mode);
23480
- emit_insn (gen_mul (e1, e0, e0)); /* e1 = e0 * e0 */
23481
+ /* x1 = x0 + e0 * x0 */
23482
+ x1 = gen_reg_rtx (mode);
23483
+ rs6000_emit_madd (x1, e0, x0, x0);
23485
- y2 = gen_reg_rtx (mode);
23486
- rs6000_emit_madd (y2, e1, y1, y1); /* y2 = y1 + e1 * y1 */
23487
+ for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
23488
+ ++i, xprev = xnext, eprev = enext) {
23490
+ /* enext = eprev * eprev */
23491
+ enext = gen_reg_rtx (mode);
23492
+ emit_insn (gen_mul (enext, eprev, eprev));
23494
- e2 = gen_reg_rtx (mode);
23495
- emit_insn (gen_mul (e2, e1, e1)); /* e2 = e1 * e1 */
23496
+ /* xnext = xprev + enext * xprev */
23497
+ xnext = gen_reg_rtx (mode);
23498
+ rs6000_emit_madd (xnext, enext, xprev, xprev);
23501
- y3 = gen_reg_rtx (mode);
23502
- rs6000_emit_madd (y3, e2, y2, y2); /* y3 = y2 + e2 * y2 */
23506
- u0 = gen_reg_rtx (mode);
23507
- emit_insn (gen_mul (u0, n, y3)); /* u0 = n * y3 */
23508
+ /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23510
- v0 = gen_reg_rtx (mode);
23511
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - d * u0 */
23512
+ /* u = n * xprev */
23513
+ u = gen_reg_rtx (mode);
23514
+ emit_insn (gen_mul (u, n, xprev));
23516
- rs6000_emit_madd (dst, v0, y3, u0); /* dst = u0 + v0 * y3 */
23518
+ /* v = n - (d * u) */
23519
+ v = gen_reg_rtx (mode);
23520
+ rs6000_emit_nmsub (v, d, u, n);
23522
-/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23523
- add a reg_note saying that this was a division. Support both scalar and
23524
- vector divide. Assumes no trapping math and finite arguments. */
23525
+ /* dst = (v * xprev) + u */
23526
+ rs6000_emit_madd (dst, v, xprev, u);
23529
-rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23531
- enum machine_mode mode = GET_MODE (dst);
23533
- if (RS6000_RECIP_HIGH_PRECISION_P (mode))
23534
- rs6000_emit_swdiv_high_precision (dst, n, d);
23536
- rs6000_emit_swdiv_low_precision (dst, n, d);
23539
add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23541
@@ -26759,7 +29831,16 @@
23542
enum machine_mode mode = GET_MODE (src);
23543
rtx x0 = gen_reg_rtx (mode);
23544
rtx y = gen_reg_rtx (mode);
23545
- int passes = (TARGET_RECIP_PRECISION) ? 2 : 3;
23547
+ /* Low precision estimates guarantee 5 bits of accuracy. High
23548
+ precision estimates guarantee 14 bits of accuracy. SFmode
23549
+ requires 23 bits of accuracy. DFmode requires 52 bits of
23550
+ accuracy. Each pass at least doubles the accuracy, leading
23551
+ to the following. */
23552
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23553
+ if (mode == DFmode || mode == V2DFmode)
23556
REAL_VALUE_TYPE dconst3_2;
23559
@@ -26921,6 +30002,137 @@
23563
+/* Expand an Altivec constant permutation for little endian mode.
23564
+ There are two issues: First, the two input operands must be
23565
+ swapped so that together they form a double-wide array in LE
23566
+ order. Second, the vperm instruction has surprising behavior
23567
+ in LE mode: it interprets the elements of the source vectors
23568
+ in BE mode ("left to right") and interprets the elements of
23569
+ the destination vector in LE mode ("right to left"). To
23570
+ correct for this, we must subtract each element of the permute
23571
+ control vector from 31.
23573
+ For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23574
+ with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23575
+ We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23576
+ serve as the permute control vector. Then, in BE mode,
23580
+ places the desired result in vr9. However, in LE mode the
23581
+ vector contents will be
23583
+ vr10 = 00000003 00000002 00000001 00000000
23584
+ vr11 = 00000007 00000006 00000005 00000004
23586
+ The result of the vperm using the same permute control vector is
23588
+ vr9 = 05000000 07000000 01000000 03000000
23590
+ That is, the leftmost 4 bytes of vr10 are interpreted as the
23591
+ source for the rightmost 4 bytes of vr9, and so on.
23593
+ If we change the permute control vector to
23595
+ vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23601
+ we get the desired
23603
+ vr9 = 00000006 00000004 00000002 00000000. */
23606
+altivec_expand_vec_perm_const_le (rtx operands[4])
23610
+ rtx constv, unspec;
23611
+ rtx target = operands[0];
23612
+ rtx op0 = operands[1];
23613
+ rtx op1 = operands[2];
23614
+ rtx sel = operands[3];
23616
+ /* Unpack and adjust the constant selector. */
23617
+ for (i = 0; i < 16; ++i)
23619
+ rtx e = XVECEXP (sel, 0, i);
23620
+ unsigned int elt = 31 - (INTVAL (e) & 31);
23621
+ perm[i] = GEN_INT (elt);
23624
+ /* Expand to a permute, swapping the inputs and using the
23625
+ adjusted selector. */
23626
+ if (!REG_P (op0))
23627
+ op0 = force_reg (V16QImode, op0);
23628
+ if (!REG_P (op1))
23629
+ op1 = force_reg (V16QImode, op1);
23631
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23632
+ constv = force_reg (V16QImode, constv);
23633
+ unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23635
+ if (!REG_P (target))
23637
+ rtx tmp = gen_reg_rtx (V16QImode);
23638
+ emit_move_insn (tmp, unspec);
23642
+ emit_move_insn (target, unspec);
23645
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23646
+ permute control vector. But here it's not a constant, so we must
23647
+ generate a vector splat/subtract to do the adjustment. */
23650
+altivec_expand_vec_perm_le (rtx operands[4])
23652
+ rtx splat, unspec;
23653
+ rtx target = operands[0];
23654
+ rtx op0 = operands[1];
23655
+ rtx op1 = operands[2];
23656
+ rtx sel = operands[3];
23657
+ rtx tmp = target;
23658
+ rtx splatreg = gen_reg_rtx (V16QImode);
23659
+ enum machine_mode mode = GET_MODE (target);
23661
+ /* Get everything in regs so the pattern matches. */
23662
+ if (!REG_P (op0))
23663
+ op0 = force_reg (mode, op0);
23664
+ if (!REG_P (op1))
23665
+ op1 = force_reg (mode, op1);
23666
+ if (!REG_P (sel))
23667
+ sel = force_reg (V16QImode, sel);
23668
+ if (!REG_P (target))
23669
+ tmp = gen_reg_rtx (mode);
23671
+ /* SEL = splat(31) - SEL. */
23672
+ /* We want to subtract from 31, but we can't vspltisb 31 since
23673
+ it's out of range. -1 works as well because only the low-order
23674
+ five bits of the permute control vector elements are used. */
23675
+ splat = gen_rtx_VEC_DUPLICATE (V16QImode,
23676
+ gen_rtx_CONST_INT (QImode, -1));
23677
+ emit_move_insn (splatreg, splat);
23678
+ sel = gen_rtx_MINUS (V16QImode, splatreg, sel);
23679
+ emit_move_insn (splatreg, sel);
23681
+ /* Permute with operands reversed and adjusted selector. */
23682
+ unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, splatreg), UNSPEC_VPERM);
23684
+ /* Copy into target, possibly by way of a register. */
23685
+ if (!REG_P (target))
23687
+ emit_move_insn (tmp, unspec);
23691
+ emit_move_insn (target, unspec);
23694
/* Expand an Altivec constant permutation. Return true if we match
23695
an efficient implementation; false to fall back to VPERM. */
23697
@@ -26928,26 +30140,43 @@
23698
altivec_expand_vec_perm_const (rtx operands[4])
23700
struct altivec_perm_insn {
23701
+ HOST_WIDE_INT mask;
23702
enum insn_code impl;
23703
unsigned char perm[16];
23705
static const struct altivec_perm_insn patterns[] = {
23706
- { CODE_FOR_altivec_vpkuhum,
23707
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
23708
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
23709
- { CODE_FOR_altivec_vpkuwum,
23710
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
23711
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
23712
- { CODE_FOR_altivec_vmrghb,
23713
+ { OPTION_MASK_ALTIVEC,
23714
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23715
+ : CODE_FOR_altivec_vmrglb_direct),
23716
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
23717
- { CODE_FOR_altivec_vmrghh,
23718
+ { OPTION_MASK_ALTIVEC,
23719
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23720
+ : CODE_FOR_altivec_vmrglh_direct),
23721
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
23722
- { CODE_FOR_altivec_vmrghw,
23723
+ { OPTION_MASK_ALTIVEC,
23724
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
23725
+ : CODE_FOR_altivec_vmrglw_direct),
23726
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
23727
- { CODE_FOR_altivec_vmrglb,
23728
+ { OPTION_MASK_ALTIVEC,
23729
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23730
+ : CODE_FOR_altivec_vmrghb_direct),
23731
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
23732
- { CODE_FOR_altivec_vmrglh,
23733
+ { OPTION_MASK_ALTIVEC,
23734
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23735
+ : CODE_FOR_altivec_vmrghh_direct),
23736
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
23737
- { CODE_FOR_altivec_vmrglw,
23738
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
23739
+ { OPTION_MASK_ALTIVEC,
23740
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
23741
+ : CODE_FOR_altivec_vmrghw_direct),
23742
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
23743
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
23744
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
23745
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
23746
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
23749
unsigned int i, j, elt, which;
23750
@@ -27004,7 +30233,9 @@
23754
- emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
23755
+ if (!BYTES_BIG_ENDIAN)
23757
+ emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23761
@@ -27015,9 +30246,10 @@
23765
+ int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23766
x = gen_reg_rtx (V8HImode);
23767
- emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
23768
- GEN_INT (elt / 2)));
23769
+ emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23770
+ GEN_INT (field)));
23771
emit_move_insn (target, gen_lowpart (V16QImode, x));
23774
@@ -27033,9 +30265,10 @@
23778
+ int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23779
x = gen_reg_rtx (V4SImode);
23780
- emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
23781
- GEN_INT (elt / 4)));
23782
+ emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23783
+ GEN_INT (field)));
23784
emit_move_insn (target, gen_lowpart (V16QImode, x));
23787
@@ -27047,6 +30280,9 @@
23791
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
23794
elt = patterns[j].perm[0];
23795
if (perm[0] == elt)
23797
@@ -27070,7 +30306,30 @@
23798
enum machine_mode omode = insn_data[icode].operand[0].mode;
23799
enum machine_mode imode = insn_data[icode].operand[1].mode;
23802
+ /* For little-endian, don't use vpkuwum and vpkuhum if the
23803
+ underlying vector type is not V4SI and V8HI, respectively.
23804
+ For example, using vpkuwum with a V8HI picks up the even
23805
+ halfwords (BE numbering) when the even halfwords (LE
23806
+ numbering) are what we need. */
23807
+ if (!BYTES_BIG_ENDIAN
23808
+ && icode == CODE_FOR_altivec_vpkuwum_direct
23809
+ && ((GET_CODE (op0) == REG
23810
+ && GET_MODE (op0) != V4SImode)
23811
+ || (GET_CODE (op0) == SUBREG
23812
+ && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23814
+ if (!BYTES_BIG_ENDIAN
23815
+ && icode == CODE_FOR_altivec_vpkuhum_direct
23816
+ && ((GET_CODE (op0) == REG
23817
+ && GET_MODE (op0) != V8HImode)
23818
+ || (GET_CODE (op0) == SUBREG
23819
+ && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23822
+ /* For little-endian, the two input operands must be swapped
23823
+ (or swapped back) to ensure proper right-to-left numbering
23824
+ from 0 to 2N-1. */
23825
+ if (swapped ^ !BYTES_BIG_ENDIAN)
23826
x = op0, op0 = op1, op1 = x;
23827
if (imode != V16QImode)
23829
@@ -27088,6 +30347,12 @@
23833
+ if (!BYTES_BIG_ENDIAN)
23835
+ altivec_expand_vec_perm_const_le (operands);
23842
@@ -27136,7 +30401,6 @@
23843
vmode = GET_MODE (target);
23844
gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23845
dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
23847
x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23848
v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23849
x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23850
@@ -27232,7 +30496,7 @@
23851
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23854
- high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
23855
+ high = (highp ? 0 : nelt / 2);
23856
for (i = 0; i < nelt / 2; i++)
23858
perm[i * 2] = GEN_INT (i + high);
23859
@@ -27287,6 +30551,8 @@
23861
enum machine_mode mode;
23862
unsigned int regno;
23863
+ enum machine_mode elt_mode;
23866
/* Special handling for structs in darwin64. */
23868
@@ -27306,6 +30572,36 @@
23869
/* Otherwise fall through to standard ABI rules. */
23872
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23873
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
23874
+ &elt_mode, &n_elts))
23876
+ int first_reg, n_regs, i;
23879
+ if (SCALAR_FLOAT_MODE_P (elt_mode))
23881
+ /* _Decimal128 must use even/odd register pairs. */
23882
+ first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23883
+ n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23887
+ first_reg = ALTIVEC_ARG_RETURN;
23891
+ par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
23892
+ for (i = 0; i < n_elts; i++)
23894
+ rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
23895
+ rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23896
+ XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23902
if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
23904
/* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23905
@@ -27418,6 +30714,13 @@
23909
+/* Return true if we use LRA instead of reload pass. */
23911
+rs6000_lra_p (void)
23913
+ return rs6000_lra_flag;
23916
/* Given FROM and TO register numbers, say whether this elimination is allowed.
23917
Frame pointer elimination is automatically handled.
23919
@@ -27680,22 +30983,33 @@
23921
{ "altivec", OPTION_MASK_ALTIVEC, false, true },
23922
{ "cmpb", OPTION_MASK_CMPB, false, true },
23923
+ { "crypto", OPTION_MASK_CRYPTO, false, true },
23924
+ { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23925
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
23926
{ "fprnd", OPTION_MASK_FPRND, false, true },
23927
{ "hard-dfp", OPTION_MASK_DFP, false, true },
23928
+ { "htm", OPTION_MASK_HTM, false, true },
23929
{ "isel", OPTION_MASK_ISEL, false, true },
23930
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
23931
{ "mfpgpr", OPTION_MASK_MFPGPR, false, true },
23932
{ "mulhw", OPTION_MASK_MULHW, false, true },
23933
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
23934
- { "update", OPTION_MASK_NO_UPDATE, true , true },
23935
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
23936
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
23937
+ { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23938
+ { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23939
+ { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23940
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23941
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23942
+ { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23943
+ { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23944
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23945
{ "string", OPTION_MASK_STRING, false, true },
23946
+ { "update", OPTION_MASK_NO_UPDATE, true , true },
23947
+ { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
23948
+ { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
23949
{ "vsx", OPTION_MASK_VSX, false, true },
23950
+ { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
23951
#ifdef OPTION_MASK_64BIT
23953
{ "aix64", OPTION_MASK_64BIT, false, false },
23954
@@ -27735,6 +31049,9 @@
23955
{ "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23956
{ "popcntd", RS6000_BTM_POPCNTD, false, false },
23957
{ "cell", RS6000_BTM_CELL, false, false },
23958
+ { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23959
+ { "crypto", RS6000_BTM_CRYPTO, false, false },
23960
+ { "htm", RS6000_BTM_HTM, false, false },
23963
/* Option variables that we want to support inside attribute((target)) and
23964
@@ -28251,7 +31568,6 @@
23966
size_t max_column = 76;
23967
const char *comma = "";
23968
- const char *nl = "\n";
23971
start_column += fprintf (file, "%*s", indent, "");
23972
@@ -28282,7 +31598,6 @@
23973
fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23974
cur_column = start_column + len;
23979
fprintf (file, "%s%s%s%s", comma, prefix, no_str,
23980
@@ -28292,7 +31607,7 @@
23984
- fputs (nl, file);
23985
+ fputs ("\n", file);
23988
/* Helper function to print the current isa options on a line. */
23989
@@ -28468,118 +31783,149 @@
23993
-/* A function pointer under AIX is a pointer to a data area whose first word
23994
- contains the actual address of the function, whose second word contains a
23995
- pointer to its TOC, and whose third word contains a value to place in the
23996
- static chain register (r11). Note that if we load the static chain, our
23997
- "trampoline" need not have any executable code. */
23999
+/* Expand code to perform a call under the AIX or ELFv2 ABI. */
24002
-rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
24003
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
24005
+ rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24006
+ rtx toc_load = NULL_RTX;
24007
+ rtx toc_restore = NULL_RTX;
24012
- rtx stack_toc_offset;
24013
- rtx stack_toc_mem;
24014
- rtx func_toc_offset;
24015
- rtx func_toc_mem;
24016
- rtx func_sc_offset;
24018
+ rtx abi_reg = NULL_RTX;
24022
- rtx (*call_func) (rtx, rtx, rtx, rtx);
24023
- rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
24025
- stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24026
- toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24027
+ /* Handle longcall attributes. */
24028
+ if (INTVAL (cookie) & CALL_LONG)
24029
+ func_desc = rs6000_longcall_ref (func_desc);
24031
- /* Load up address of the actual function. */
24032
- func_desc = force_reg (Pmode, func_desc);
24033
- func_addr = gen_reg_rtx (Pmode);
24034
- emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
24036
- if (TARGET_32BIT)
24037
+ /* Handle indirect calls. */
24038
+ if (GET_CODE (func_desc) != SYMBOL_REF
24039
+ || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
24041
+ /* Save the TOC into its reserved slot before the call,
24042
+ and prepare to restore it after the call. */
24043
+ rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24044
+ rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24045
+ rtx stack_toc_mem = gen_frame_mem (Pmode,
24046
+ gen_rtx_PLUS (Pmode, stack_ptr,
24047
+ stack_toc_offset));
24048
+ toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
24050
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
24051
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
24052
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
24053
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
24055
- call_func = gen_call_indirect_aix32bit;
24056
- call_value_func = gen_call_value_indirect_aix32bit;
24058
+ /* Can we optimize saving the TOC in the prologue or
24059
+ do we need to do it at every call? */
24060
+ if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24061
+ cfun->machine->save_toc_in_prologue = true;
24064
- call_func = gen_call_indirect_aix32bit_nor11;
24065
- call_value_func = gen_call_value_indirect_aix32bit_nor11;
24066
+ MEM_VOLATILE_P (stack_toc_mem) = 1;
24067
+ emit_move_insn (stack_toc_mem, toc_reg);
24072
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
24073
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
24074
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
24075
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
24077
+ if (DEFAULT_ABI == ABI_ELFv2)
24079
- call_func = gen_call_indirect_aix64bit;
24080
- call_value_func = gen_call_value_indirect_aix64bit;
24081
+ /* A function pointer in the ELFv2 ABI is just a plain address, but
24082
+ the ABI requires it to be loaded into r12 before the call. */
24083
+ func_addr = gen_rtx_REG (Pmode, 12);
24084
+ emit_move_insn (func_addr, func_desc);
24085
+ abi_reg = func_addr;
24089
- call_func = gen_call_indirect_aix64bit_nor11;
24090
- call_value_func = gen_call_value_indirect_aix64bit_nor11;
24093
+ /* A function pointer under AIX is a pointer to a data area whose
24094
+ first word contains the actual address of the function, whose
24095
+ second word contains a pointer to its TOC, and whose third word
24096
+ contains a value to place in the static chain register (r11).
24097
+ Note that if we load the static chain, our "trampoline" need
24098
+ not have any executable code. */
24100
- /* Reserved spot to store the TOC. */
24101
- stack_toc_mem = gen_frame_mem (Pmode,
24102
- gen_rtx_PLUS (Pmode,
24104
- stack_toc_offset));
24105
+ /* Load up address of the actual function. */
24106
+ func_desc = force_reg (Pmode, func_desc);
24107
+ func_addr = gen_reg_rtx (Pmode);
24108
+ emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
24110
- gcc_assert (cfun);
24111
- gcc_assert (cfun->machine);
24112
+ /* Prepare to load the TOC of the called function. Note that the
24113
+ TOC load must happen immediately before the actual call so
24114
+ that unwinding the TOC registers works correctly. See the
24115
+ comment in frob_update_context. */
24116
+ rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24117
+ rtx func_toc_mem = gen_rtx_MEM (Pmode,
24118
+ gen_rtx_PLUS (Pmode, func_desc,
24119
+ func_toc_offset));
24120
+ toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24122
- /* Can we optimize saving the TOC in the prologue or do we need to do it at
24124
- if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24125
- cfun->machine->save_toc_in_prologue = true;
24127
+ /* If we have a static chain, load it up. */
24128
+ if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
24130
+ rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24131
+ rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24132
+ rtx func_sc_mem = gen_rtx_MEM (Pmode,
24133
+ gen_rtx_PLUS (Pmode, func_desc,
24134
+ func_sc_offset));
24135
+ emit_move_insn (sc_reg, func_sc_mem);
24136
+ abi_reg = sc_reg;
24142
- MEM_VOLATILE_P (stack_toc_mem) = 1;
24143
- emit_move_insn (stack_toc_mem, toc_reg);
24144
+ /* Direct calls use the TOC: for local calls, the callee will
24145
+ assume the TOC register is set; for non-local calls, the
24146
+ PLT stub needs the TOC register. */
24147
+ abi_reg = toc_reg;
24148
+ func_addr = func_desc;
24151
- /* Calculate the address to load the TOC of the called function. We don't
24152
- actually load this until the split after reload. */
24153
- func_toc_mem = gen_rtx_MEM (Pmode,
24154
- gen_rtx_PLUS (Pmode,
24156
- func_toc_offset));
24157
+ /* Create the call. */
24158
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
24159
+ if (value != NULL_RTX)
24160
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
24163
- /* If we have a static chain, load it up. */
24164
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
24166
- func_sc_mem = gen_rtx_MEM (Pmode,
24167
- gen_rtx_PLUS (Pmode,
24169
- func_sc_offset));
24171
+ call[n_call++] = toc_load;
24173
+ call[n_call++] = toc_restore;
24175
- sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24176
- emit_move_insn (sc_reg, func_sc_mem);
24178
+ call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
24180
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24181
+ insn = emit_call_insn (insn);
24183
+ /* Mention all registers defined by the ABI to hold information
24184
+ as uses in CALL_INSN_FUNCTION_USAGE. */
24186
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24189
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24192
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
24197
+ gcc_assert (INTVAL (cookie) == 0);
24199
/* Create the call. */
24201
- insn = call_value_func (value, func_addr, flag, func_toc_mem,
24204
- insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
24205
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
24206
+ if (value != NULL_RTX)
24207
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
24209
- emit_call_insn (insn);
24210
+ call[1] = simple_return_rtx;
24212
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24213
+ insn = emit_call_insn (insn);
24215
+ /* Note use of the TOC register. */
24216
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
24217
+ /* We need to also mark a use of the link register since the function we
24218
+ sibling-call to will use it to return to our caller. */
24219
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
24222
/* Return whether we need to always update the saved TOC pointer when we update
24223
@@ -28680,6 +32026,656 @@
24224
add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24228
+/* Helper function for rs6000_split_logical to emit a logical instruction after
24229
+ spliting the operation to single GPR registers.
24231
+ DEST is the destination register.
24232
+ OP1 and OP2 are the input source registers.
24233
+ CODE is the base operation (AND, IOR, XOR, NOT).
24234
+ MODE is the machine mode.
24235
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
24236
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
24237
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
24238
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
24239
+ formation of the AND instructions. */
24242
+rs6000_split_logical_inner (rtx dest,
24245
+ enum rtx_code code,
24246
+ enum machine_mode mode,
24247
+ bool complement_final_p,
24248
+ bool complement_op1_p,
24249
+ bool complement_op2_p,
24255
+ /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
24256
+ if (op2 && GET_CODE (op2) == CONST_INT
24257
+ && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
24258
+ && !complement_final_p && !complement_op1_p && !complement_op2_p)
24260
+ HOST_WIDE_INT mask = GET_MODE_MASK (mode);
24261
+ HOST_WIDE_INT value = INTVAL (op2) & mask;
24263
+ /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
24268
+ emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
24272
+ else if (value == mask)
24274
+ if (!rtx_equal_p (dest, op1))
24275
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
24280
+ /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
24281
+ into separate ORI/ORIS or XORI/XORIS instrucitons. */
24282
+ else if (code == IOR || code == XOR)
24286
+ if (!rtx_equal_p (dest, op1))
24287
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
24293
+ if (complement_op1_p)
24294
+ op1 = gen_rtx_NOT (mode, op1);
24296
+ if (complement_op2_p)
24297
+ op2 = gen_rtx_NOT (mode, op2);
24299
+ bool_rtx = ((code == NOT)
24300
+ ? gen_rtx_NOT (mode, op1)
24301
+ : gen_rtx_fmt_ee (code, mode, op1, op2));
24303
+ if (complement_final_p)
24304
+ bool_rtx = gen_rtx_NOT (mode, bool_rtx);
24306
+ set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
24308
+ /* Is this AND with an explicit clobber? */
24311
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
24312
+ set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
24315
+ emit_insn (set_rtx);
24319
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
24320
+ operations are split immediately during RTL generation to allow for more
24321
+ optimizations of the AND/IOR/XOR.
24323
+ OPERANDS is an array containing the destination and two input operands.
24324
+ CODE is the base operation (AND, IOR, XOR, NOT).
24325
+ MODE is the machine mode.
24326
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
24327
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
24328
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
24329
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
24330
+ formation of the AND instructions. */
24333
+rs6000_split_logical_di (rtx operands[3],
24334
+ enum rtx_code code,
24335
+ bool complement_final_p,
24336
+ bool complement_op1_p,
24337
+ bool complement_op2_p,
24340
+ const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
24341
+ const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
24342
+ const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
24343
+ enum hi_lo { hi = 0, lo = 1 };
24344
+ rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
24347
+ op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
24348
+ op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
24349
+ op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
24350
+ op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
24353
+ op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
24356
+ if (GET_CODE (operands[2]) != CONST_INT)
24358
+ op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
24359
+ op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
24363
+ HOST_WIDE_INT value = INTVAL (operands[2]);
24364
+ HOST_WIDE_INT value_hi_lo[2];
24366
+ gcc_assert (!complement_final_p);
24367
+ gcc_assert (!complement_op1_p);
24368
+ gcc_assert (!complement_op2_p);
24370
+ value_hi_lo[hi] = value >> 32;
24371
+ value_hi_lo[lo] = value & lower_32bits;
24373
+ for (i = 0; i < 2; i++)
24375
+ HOST_WIDE_INT sub_value = value_hi_lo[i];
24377
+ if (sub_value & sign_bit)
24378
+ sub_value |= upper_32bits;
24380
+ op2_hi_lo[i] = GEN_INT (sub_value);
24382
+ /* If this is an AND instruction, check to see if we need to load
24383
+ the value in a register. */
24384
+ if (code == AND && sub_value != -1 && sub_value != 0
24385
+ && !and_operand (op2_hi_lo[i], SImode))
24386
+ op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
24391
+ for (i = 0; i < 2; i++)
24393
+ /* Split large IOR/XOR operations. */
24394
+ if ((code == IOR || code == XOR)
24395
+ && GET_CODE (op2_hi_lo[i]) == CONST_INT
24396
+ && !complement_final_p
24397
+ && !complement_op1_p
24398
+ && !complement_op2_p
24399
+ && clobber_reg == NULL_RTX
24400
+ && !logical_const_operand (op2_hi_lo[i], SImode))
24402
+ HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
24403
+ HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
24404
+ HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
24405
+ rtx tmp = gen_reg_rtx (SImode);
24407
+ /* Make sure the constant is sign extended. */
24408
+ if ((hi_16bits & sign_bit) != 0)
24409
+ hi_16bits |= upper_32bits;
24411
+ rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
24412
+ code, SImode, false, false, false,
24415
+ rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
24416
+ code, SImode, false, false, false,
24420
+ rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
24421
+ code, SImode, complement_final_p,
24422
+ complement_op1_p, complement_op2_p,
24429
+/* Split the insns that make up boolean operations operating on multiple GPR
24430
+ registers. The boolean MD patterns ensure that the inputs either are
24431
+ exactly the same as the output registers, or there is no overlap.
24433
+ OPERANDS is an array containing the destination and two input operands.
24434
+ CODE is the base operation (AND, IOR, XOR, NOT).
24435
+ MODE is the machine mode.
24436
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
24437
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
24438
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
24439
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
24440
+ formation of the AND instructions. */
24443
+rs6000_split_logical (rtx operands[3],
24444
+ enum rtx_code code,
24445
+ bool complement_final_p,
24446
+ bool complement_op1_p,
24447
+ bool complement_op2_p,
24450
+ enum machine_mode mode = GET_MODE (operands[0]);
24451
+ enum machine_mode sub_mode;
24452
+ rtx op0, op1, op2;
24453
+ int sub_size, regno0, regno1, nregs, i;
24455
+ /* If this is DImode, use the specialized version that can run before
24456
+ register allocation. */
24457
+ if (mode == DImode && !TARGET_POWERPC64)
24459
+ rs6000_split_logical_di (operands, code, complement_final_p,
24460
+ complement_op1_p, complement_op2_p,
24465
+ op0 = operands[0];
24466
+ op1 = operands[1];
24467
+ op2 = (code == NOT) ? NULL_RTX : operands[2];
24468
+ sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
24469
+ sub_size = GET_MODE_SIZE (sub_mode);
24470
+ regno0 = REGNO (op0);
24471
+ regno1 = REGNO (op1);
24473
+ gcc_assert (reload_completed);
24474
+ gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
24475
+ gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
24477
+ nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
24478
+ gcc_assert (nregs > 1);
24480
+ if (op2 && REG_P (op2))
24481
+ gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
24483
+ for (i = 0; i < nregs; i++)
24485
+ int offset = i * sub_size;
24486
+ rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
24487
+ rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
24488
+ rtx sub_op2 = ((code == NOT)
24490
+ : simplify_subreg (sub_mode, op2, mode, offset));
24492
+ rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
24493
+ complement_final_p, complement_op1_p,
24494
+ complement_op2_p, clobber_reg);
24501
+/* Return true if the peephole2 can combine a load involving a combination of
24502
+ an addis instruction and a load with an offset that can be fused together on
24505
+ The operands are:
24506
+ operands[0] register set with addis
24507
+ operands[1] value set via addis
24508
+ operands[2] target register being loaded
24509
+ operands[3] D-form memory reference using operands[0].
24511
+ In addition, we are passed a boolean that is true if this is a peephole2,
24512
+ and we can use see if the addis_reg is dead after the insn and can be
24513
+ replaced by the target register. */
24516
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
24518
+ rtx addis_reg = operands[0];
24519
+ rtx addis_value = operands[1];
24520
+ rtx target = operands[2];
24521
+ rtx mem = operands[3];
24525
+ /* Validate arguments. */
24526
+ if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
24529
+ if (!base_reg_operand (target, GET_MODE (target)))
24532
+ if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
24535
+ if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
24538
+ /* Allow sign/zero extension. */
24539
+ if (GET_CODE (mem) == ZERO_EXTEND
24540
+ || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
24541
+ mem = XEXP (mem, 0);
24543
+ if (!MEM_P (mem))
24546
+ addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
24547
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
24550
+ /* Validate that the register used to load the high value is either the
24551
+ register being loaded, or we can safely replace its use in a peephole2.
24553
+ If this is a peephole2, we assume that there are 2 instructions in the
24554
+ peephole (addis and load), so we want to check if the target register was
24555
+ not used in the memory address and the register to hold the addis result
24556
+ is dead after the peephole. */
24557
+ if (REGNO (addis_reg) != REGNO (target))
24562
+ if (reg_mentioned_p (target, mem))
24565
+ if (!peep2_reg_dead_p (2, addis_reg))
24569
+ base_reg = XEXP (addr, 0);
24570
+ return REGNO (addis_reg) == REGNO (base_reg);
24573
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
24574
+ sequence. We adjust the addis register to use the target register. If the
24575
+ load sign extends, we adjust the code to do the zero extending load, and an
24576
+ explicit sign extension later since the fusion only covers zero extending
24579
+ The operands are:
24580
+ operands[0] register set with addis (to be replaced with target)
24581
+ operands[1] value set via addis
24582
+ operands[2] target register being loaded
24583
+ operands[3] D-form memory reference using operands[0]. */
24586
+expand_fusion_gpr_load (rtx *operands)
24588
+ rtx addis_value = operands[1];
24589
+ rtx target = operands[2];
24590
+ rtx orig_mem = operands[3];
24591
+ rtx new_addr, new_mem, orig_addr, offset;
24592
+ enum rtx_code plus_or_lo_sum;
24593
+ enum machine_mode target_mode = GET_MODE (target);
24594
+ enum machine_mode extend_mode = target_mode;
24595
+ enum machine_mode ptr_mode = Pmode;
24596
+ enum rtx_code extend = UNKNOWN;
24597
+ rtx addis_reg = ((ptr_mode == target_mode)
24599
+ : simplify_subreg (ptr_mode, target, target_mode, 0));
24601
+ if (GET_CODE (orig_mem) == ZERO_EXTEND
24602
+ || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
24604
+ extend = GET_CODE (orig_mem);
24605
+ orig_mem = XEXP (orig_mem, 0);
24606
+ target_mode = GET_MODE (orig_mem);
24609
+ gcc_assert (MEM_P (orig_mem));
24611
+ orig_addr = XEXP (orig_mem, 0);
24612
+ plus_or_lo_sum = GET_CODE (orig_addr);
24613
+ gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
24615
+ offset = XEXP (orig_addr, 1);
24616
+ new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
24617
+ new_mem = change_address (orig_mem, target_mode, new_addr);
24619
+ if (extend != UNKNOWN)
24620
+ new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
24622
+ emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
24623
+ emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
24625
+ if (extend == SIGN_EXTEND)
24627
+ int sub_off = ((BYTES_BIG_ENDIAN)
24628
+ ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
24631
+ = simplify_subreg (target_mode, target, extend_mode, sub_off);
24633
+ emit_insn (gen_rtx_SET (VOIDmode, target,
24634
+ gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
24640
+/* Return a string to fuse an addis instruction with a gpr load to the same
24641
+ register that we loaded up the addis instruction. The code is complicated,
24642
+ so we call output_asm_insn directly, and just return "".
24644
+ The operands are:
24645
+ operands[0] register set with addis (must be same reg as target).
24646
+ operands[1] value set via addis
24647
+ operands[2] target register being loaded
24648
+ operands[3] D-form memory reference using operands[0]. */
24651
+emit_fusion_gpr_load (rtx *operands)
24653
+ rtx addis_reg = operands[0];
24654
+ rtx addis_value = operands[1];
24655
+ rtx target = operands[2];
24656
+ rtx mem = operands[3];
24657
+ rtx fuse_ops[10];
24660
+ const char *addis_str = NULL;
24661
+ const char *load_str = NULL;
24662
+ const char *extend_insn = NULL;
24663
+ const char *mode_name = NULL;
24664
+ char insn_template[80];
24665
+ enum machine_mode mode;
24666
+ const char *comment_str = ASM_COMMENT_START;
24667
+ bool sign_p = false;
24669
+ gcc_assert (REG_P (addis_reg) && REG_P (target));
24670
+ gcc_assert (REGNO (addis_reg) == REGNO (target));
24672
+ if (*comment_str == ' ')
24675
+ /* Allow sign/zero extension. */
24676
+ if (GET_CODE (mem) == ZERO_EXTEND)
24677
+ mem = XEXP (mem, 0);
24679
+ else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
24682
+ mem = XEXP (mem, 0);
24685
+ gcc_assert (MEM_P (mem));
24686
+ addr = XEXP (mem, 0);
24687
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
24688
+ gcc_unreachable ();
24690
+ load_offset = XEXP (addr, 1);
24692
+ /* Now emit the load instruction to the same register. */
24693
+ mode = GET_MODE (mem);
24697
+ mode_name = "char";
24698
+ load_str = "lbz";
24699
+ extend_insn = "extsb %0,%0";
24703
+ mode_name = "short";
24704
+ load_str = "lhz";
24705
+ extend_insn = "extsh %0,%0";
24709
+ mode_name = "int";
24710
+ load_str = "lwz";
24711
+ extend_insn = "extsw %0,%0";
24715
+ if (TARGET_POWERPC64)
24717
+ mode_name = "long";
24721
+ gcc_unreachable ();
24725
+ gcc_unreachable ();
24728
+ /* Emit the addis instruction. */
24729
+ fuse_ops[0] = target;
24730
+ if (satisfies_constraint_L (addis_value))
24732
+ fuse_ops[1] = addis_value;
24733
+ addis_str = "lis %0,%v1";
24736
+ else if (GET_CODE (addis_value) == PLUS)
24738
+ rtx op0 = XEXP (addis_value, 0);
24739
+ rtx op1 = XEXP (addis_value, 1);
24741
+ if (REG_P (op0) && CONST_INT_P (op1)
24742
+ && satisfies_constraint_L (op1))
24744
+ fuse_ops[1] = op0;
24745
+ fuse_ops[2] = op1;
24746
+ addis_str = "addis %0,%1,%v2";
24750
+ else if (GET_CODE (addis_value) == HIGH)
24752
+ rtx value = XEXP (addis_value, 0);
24753
+ if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
24755
+ fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
24756
+ fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
24758
+ addis_str = "addis %0,%2,%1@toc@ha";
24760
+ else if (TARGET_XCOFF)
24761
+ addis_str = "addis %0,%1@u(%2)";
24764
+ gcc_unreachable ();
24767
+ else if (GET_CODE (value) == PLUS)
24769
+ rtx op0 = XEXP (value, 0);
24770
+ rtx op1 = XEXP (value, 1);
24772
+ if (GET_CODE (op0) == UNSPEC
24773
+ && XINT (op0, 1) == UNSPEC_TOCREL
24774
+ && CONST_INT_P (op1))
24776
+ fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
24777
+ fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
24778
+ fuse_ops[3] = op1;
24780
+ addis_str = "addis %0,%2,%1+%3@toc@ha";
24782
+ else if (TARGET_XCOFF)
24783
+ addis_str = "addis %0,%1+%3@u(%2)";
24786
+ gcc_unreachable ();
24790
+ else if (satisfies_constraint_L (value))
24792
+ fuse_ops[1] = value;
24793
+ addis_str = "lis %0,%v1";
24796
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
24798
+ fuse_ops[1] = value;
24799
+ addis_str = "lis %0,%1@ha";
24804
+ fatal_insn ("Could not generate addis value for fusion", addis_value);
24806
+ sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
24807
+ comment_str, mode_name);
24808
+ output_asm_insn (insn_template, fuse_ops);
24810
+ /* Emit the D-form load instruction. */
24811
+ if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
24813
+ sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
24814
+ fuse_ops[1] = load_offset;
24815
+ output_asm_insn (insn_template, fuse_ops);
24818
+ else if (GET_CODE (load_offset) == UNSPEC
24819
+ && XINT (load_offset, 1) == UNSPEC_TOCREL)
24822
+ sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
24824
+ else if (TARGET_XCOFF)
24825
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
24828
+ gcc_unreachable ();
24830
+ fuse_ops[1] = XVECEXP (load_offset, 0, 0);
24831
+ output_asm_insn (insn_template, fuse_ops);
24834
+ else if (GET_CODE (load_offset) == PLUS
24835
+ && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
24836
+ && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
24837
+ && CONST_INT_P (XEXP (load_offset, 1)))
24839
+ rtx tocrel_unspec = XEXP (load_offset, 0);
24841
+ sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
24843
+ else if (TARGET_XCOFF)
24844
+ sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
24847
+ gcc_unreachable ();
24849
+ fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
24850
+ fuse_ops[2] = XEXP (load_offset, 1);
24851
+ output_asm_insn (insn_template, fuse_ops);
24854
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
24856
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
24858
+ fuse_ops[1] = load_offset;
24859
+ output_asm_insn (insn_template, fuse_ops);
24863
+ fatal_insn ("Unable to generate load offset for fusion", load_offset);
24865
+ /* Handle sign extension. The peephole2 pass generates this as a separate
24866
+ insn, but we handle it just in case it got reattached. */
24869
+ gcc_assert (extend_insn != NULL);
24870
+ output_asm_insn (extend_insn, fuse_ops);
24877
struct gcc_target targetm = TARGET_INITIALIZER;
24879
#include "gt-rs6000.h"
24880
--- a/src/gcc/config/rs6000/vsx.md
24881
+++ b/src/gcc/config/rs6000/vsx.md
24883
;; it to use gprs as well as vsx registers.
24884
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
24886
+(define_mode_iterator VSX_M2 [V16QI
24892
+ (TI "TARGET_VSX_TIMODE")])
24894
;; Map into the appropriate load/store name based on the type
24895
(define_mode_attr VSm [(V16QI "vw4")
24904
;; Map into the appropriate suffix based on the type
24905
(define_mode_attr VSs [(V16QI "sp")
24913
;; Map the register class used
24914
(define_mode_attr VSr [(V16QI "v")
24922
;; Map the register class used for float<->int conversions
24923
(define_mode_attr VSr2 [(V2DF "wd")
24924
@@ -115,7 +123,6 @@
24931
;; Appropriate type for add ops (and other simple FP ops)
24932
@@ -192,6 +199,8 @@
24933
UNSPEC_VSX_CVDPSXWS
24934
UNSPEC_VSX_CVDPUXWS
24936
+ UNSPEC_VSX_CVSPDPN
24937
+ UNSPEC_VSX_CVDPSPN
24941
@@ -204,80 +213,397 @@
24943
UNSPEC_VSX_ROUND_IC
24945
+ UNSPEC_VSX_XXSPLTW
24949
-(define_insn "*vsx_mov<mode>"
24950
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
24951
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
24952
- "VECTOR_MEM_VSX_P (<MODE>mode)
24953
- && (register_operand (operands[0], <MODE>mode)
24954
- || register_operand (operands[1], <MODE>mode))"
24956
+;; The patterns for LE permuted loads and stores come before the general
24957
+;; VSX moves so they match first.
24958
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
24959
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
24960
+ (match_operand:VSX_D 1 "memory_operand" "Z"))]
24961
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
24963
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
24964
+ [(set (match_dup 2)
24965
+ (vec_select:<MODE>
24967
+ (parallel [(const_int 1) (const_int 0)])))
24968
+ (set (match_dup 0)
24969
+ (vec_select:<MODE>
24971
+ (parallel [(const_int 1) (const_int 0)])))]
24974
- switch (which_alternative)
24978
- gcc_assert (MEM_P (operands[0])
24979
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
24980
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
24981
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
24982
- return "stx<VSm>x %x1,%y0";
24983
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
24987
+ [(set_attr "type" "vecload")
24988
+ (set_attr "length" "8")])
24992
- gcc_assert (MEM_P (operands[1])
24993
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
24994
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
24995
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
24996
- return "lx<VSm>x %x0,%y1";
24997
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
24998
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
24999
+ (match_operand:VSX_W 1 "memory_operand" "Z"))]
25000
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25002
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25003
+ [(set (match_dup 2)
25004
+ (vec_select:<MODE>
25006
+ (parallel [(const_int 2) (const_int 3)
25007
+ (const_int 0) (const_int 1)])))
25008
+ (set (match_dup 0)
25009
+ (vec_select:<MODE>
25011
+ (parallel [(const_int 2) (const_int 3)
25012
+ (const_int 0) (const_int 1)])))]
25015
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
25019
+ [(set_attr "type" "vecload")
25020
+ (set_attr "length" "8")])
25024
- return "xxlor %x0,%x1,%x1";
25025
+(define_insn_and_split "*vsx_le_perm_load_v8hi"
25026
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
25027
+ (match_operand:V8HI 1 "memory_operand" "Z"))]
25028
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25030
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25031
+ [(set (match_dup 2)
25034
+ (parallel [(const_int 4) (const_int 5)
25035
+ (const_int 6) (const_int 7)
25036
+ (const_int 0) (const_int 1)
25037
+ (const_int 2) (const_int 3)])))
25038
+ (set (match_dup 0)
25041
+ (parallel [(const_int 4) (const_int 5)
25042
+ (const_int 6) (const_int 7)
25043
+ (const_int 0) (const_int 1)
25044
+ (const_int 2) (const_int 3)])))]
25047
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
25051
+ [(set_attr "type" "vecload")
25052
+ (set_attr "length" "8")])
25059
+(define_insn_and_split "*vsx_le_perm_load_v16qi"
25060
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
25061
+ (match_operand:V16QI 1 "memory_operand" "Z"))]
25062
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25064
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25065
+ [(set (match_dup 2)
25066
+ (vec_select:V16QI
25068
+ (parallel [(const_int 8) (const_int 9)
25069
+ (const_int 10) (const_int 11)
25070
+ (const_int 12) (const_int 13)
25071
+ (const_int 14) (const_int 15)
25072
+ (const_int 0) (const_int 1)
25073
+ (const_int 2) (const_int 3)
25074
+ (const_int 4) (const_int 5)
25075
+ (const_int 6) (const_int 7)])))
25076
+ (set (match_dup 0)
25077
+ (vec_select:V16QI
25079
+ (parallel [(const_int 8) (const_int 9)
25080
+ (const_int 10) (const_int 11)
25081
+ (const_int 12) (const_int 13)
25082
+ (const_int 14) (const_int 15)
25083
+ (const_int 0) (const_int 1)
25084
+ (const_int 2) (const_int 3)
25085
+ (const_int 4) (const_int 5)
25086
+ (const_int 6) (const_int 7)])))]
25089
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
25093
+ [(set_attr "type" "vecload")
25094
+ (set_attr "length" "8")])
25098
- return "xxlxor %x0,%x0,%x0";
25099
+(define_insn "*vsx_le_perm_store_<mode>"
25100
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
25101
+ (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
25102
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25104
+ [(set_attr "type" "vecstore")
25105
+ (set_attr "length" "12")])
25108
- return output_vec_const_move (operands);
25110
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
25111
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
25112
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
25113
+ [(set (match_dup 2)
25114
+ (vec_select:<MODE>
25116
+ (parallel [(const_int 1) (const_int 0)])))
25117
+ (set (match_dup 0)
25118
+ (vec_select:<MODE>
25120
+ (parallel [(const_int 1) (const_int 0)])))]
25122
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
25127
- gcc_assert (MEM_P (operands[0])
25128
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
25129
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
25130
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
25131
- return "stvx %1,%y0";
25132
+;; The post-reload split requires that we re-permute the source
25133
+;; register in case it is still live.
25135
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
25136
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
25137
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
25138
+ [(set (match_dup 1)
25139
+ (vec_select:<MODE>
25141
+ (parallel [(const_int 1) (const_int 0)])))
25142
+ (set (match_dup 0)
25143
+ (vec_select:<MODE>
25145
+ (parallel [(const_int 1) (const_int 0)])))
25146
+ (set (match_dup 1)
25147
+ (vec_select:<MODE>
25149
+ (parallel [(const_int 1) (const_int 0)])))]
25153
- gcc_assert (MEM_P (operands[0])
25154
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
25155
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
25156
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
25157
- return "lvx %0,%y1";
25158
+(define_insn "*vsx_le_perm_store_<mode>"
25159
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
25160
+ (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
25161
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25163
+ [(set_attr "type" "vecstore")
25164
+ (set_attr "length" "12")])
25167
- gcc_unreachable ();
25170
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
25171
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
25172
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
25173
+ [(set (match_dup 2)
25174
+ (vec_select:<MODE>
25176
+ (parallel [(const_int 2) (const_int 3)
25177
+ (const_int 0) (const_int 1)])))
25178
+ (set (match_dup 0)
25179
+ (vec_select:<MODE>
25181
+ (parallel [(const_int 2) (const_int 3)
25182
+ (const_int 0) (const_int 1)])))]
25184
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
25188
+;; The post-reload split requires that we re-permute the source
25189
+;; register in case it is still live.
25191
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
25192
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
25193
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
25194
+ [(set (match_dup 1)
25195
+ (vec_select:<MODE>
25197
+ (parallel [(const_int 2) (const_int 3)
25198
+ (const_int 0) (const_int 1)])))
25199
+ (set (match_dup 0)
25200
+ (vec_select:<MODE>
25202
+ (parallel [(const_int 2) (const_int 3)
25203
+ (const_int 0) (const_int 1)])))
25204
+ (set (match_dup 1)
25205
+ (vec_select:<MODE>
25207
+ (parallel [(const_int 2) (const_int 3)
25208
+ (const_int 0) (const_int 1)])))]
25211
+(define_insn "*vsx_le_perm_store_v8hi"
25212
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
25213
+ (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
25214
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25216
+ [(set_attr "type" "vecstore")
25217
+ (set_attr "length" "12")])
25220
+ [(set (match_operand:V8HI 0 "memory_operand" "")
25221
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
25222
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
25223
+ [(set (match_dup 2)
25226
+ (parallel [(const_int 4) (const_int 5)
25227
+ (const_int 6) (const_int 7)
25228
+ (const_int 0) (const_int 1)
25229
+ (const_int 2) (const_int 3)])))
25230
+ (set (match_dup 0)
25233
+ (parallel [(const_int 4) (const_int 5)
25234
+ (const_int 6) (const_int 7)
25235
+ (const_int 0) (const_int 1)
25236
+ (const_int 2) (const_int 3)])))]
25238
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
25242
+;; The post-reload split requires that we re-permute the source
25243
+;; register in case it is still live.
25245
+ [(set (match_operand:V8HI 0 "memory_operand" "")
25246
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
25247
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
25248
+ [(set (match_dup 1)
25251
+ (parallel [(const_int 4) (const_int 5)
25252
+ (const_int 6) (const_int 7)
25253
+ (const_int 0) (const_int 1)
25254
+ (const_int 2) (const_int 3)])))
25255
+ (set (match_dup 0)
25258
+ (parallel [(const_int 4) (const_int 5)
25259
+ (const_int 6) (const_int 7)
25260
+ (const_int 0) (const_int 1)
25261
+ (const_int 2) (const_int 3)])))
25262
+ (set (match_dup 1)
25265
+ (parallel [(const_int 4) (const_int 5)
25266
+ (const_int 6) (const_int 7)
25267
+ (const_int 0) (const_int 1)
25268
+ (const_int 2) (const_int 3)])))]
25271
+(define_insn "*vsx_le_perm_store_v16qi"
25272
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
25273
+ (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
25274
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
25276
+ [(set_attr "type" "vecstore")
25277
+ (set_attr "length" "12")])
25280
+ [(set (match_operand:V16QI 0 "memory_operand" "")
25281
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
25282
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
25283
+ [(set (match_dup 2)
25284
+ (vec_select:V16QI
25286
+ (parallel [(const_int 8) (const_int 9)
25287
+ (const_int 10) (const_int 11)
25288
+ (const_int 12) (const_int 13)
25289
+ (const_int 14) (const_int 15)
25290
+ (const_int 0) (const_int 1)
25291
+ (const_int 2) (const_int 3)
25292
+ (const_int 4) (const_int 5)
25293
+ (const_int 6) (const_int 7)])))
25294
+ (set (match_dup 0)
25295
+ (vec_select:V16QI
25297
+ (parallel [(const_int 8) (const_int 9)
25298
+ (const_int 10) (const_int 11)
25299
+ (const_int 12) (const_int 13)
25300
+ (const_int 14) (const_int 15)
25301
+ (const_int 0) (const_int 1)
25302
+ (const_int 2) (const_int 3)
25303
+ (const_int 4) (const_int 5)
25304
+ (const_int 6) (const_int 7)])))]
25306
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
25310
+;; The post-reload split requires that we re-permute the source
25311
+;; register in case it is still live.
25313
+ [(set (match_operand:V16QI 0 "memory_operand" "")
25314
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
25315
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
25316
+ [(set (match_dup 1)
25317
+ (vec_select:V16QI
25319
+ (parallel [(const_int 8) (const_int 9)
25320
+ (const_int 10) (const_int 11)
25321
+ (const_int 12) (const_int 13)
25322
+ (const_int 14) (const_int 15)
25323
+ (const_int 0) (const_int 1)
25324
+ (const_int 2) (const_int 3)
25325
+ (const_int 4) (const_int 5)
25326
+ (const_int 6) (const_int 7)])))
25327
+ (set (match_dup 0)
25328
+ (vec_select:V16QI
25330
+ (parallel [(const_int 8) (const_int 9)
25331
+ (const_int 10) (const_int 11)
25332
+ (const_int 12) (const_int 13)
25333
+ (const_int 14) (const_int 15)
25334
+ (const_int 0) (const_int 1)
25335
+ (const_int 2) (const_int 3)
25336
+ (const_int 4) (const_int 5)
25337
+ (const_int 6) (const_int 7)])))
25338
+ (set (match_dup 1)
25339
+ (vec_select:V16QI
25341
+ (parallel [(const_int 8) (const_int 9)
25342
+ (const_int 10) (const_int 11)
25343
+ (const_int 12) (const_int 13)
25344
+ (const_int 14) (const_int 15)
25345
+ (const_int 0) (const_int 1)
25346
+ (const_int 2) (const_int 3)
25347
+ (const_int 4) (const_int 5)
25348
+ (const_int 6) (const_int 7)])))]
25352
+(define_insn "*vsx_mov<mode>"
25353
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
25354
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
25355
+ "VECTOR_MEM_VSX_P (<MODE>mode)
25356
+ && (register_operand (operands[0], <MODE>mode)
25357
+ || register_operand (operands[1], <MODE>mode))"
25359
+ return rs6000_output_move_128bit (operands);
25361
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
25362
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
25363
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
25365
-;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
25366
-;; unions. However for plain data movement, slightly favor the vector loads
25367
-(define_insn "*vsx_movti"
25368
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ")
25369
- (match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))]
25370
- "VECTOR_MEM_VSX_P (TImode)
25371
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
25372
+;; use of TImode is for unions. However for plain data movement, slightly
25373
+;; favor the vector loads
25374
+(define_insn "*vsx_movti_64bit"
25375
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
25376
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
25377
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
25378
&& (register_operand (operands[0], TImode)
25379
|| register_operand (operands[1], TImode))"
25381
+ return rs6000_output_move_128bit (operands);
25383
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
25384
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
25386
+(define_insn "*vsx_movti_32bit"
25387
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
25388
+ (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
25389
+ "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
25390
+ && (register_operand (operands[0], TImode)
25391
+ || register_operand (operands[1], TImode))"
25393
switch (which_alternative)
25396
@@ -290,27 +616,45 @@
25397
return "xxlor %x0,%x1,%x1";
25400
+ return "xxlxor %x0,%x0,%x0";
25403
+ return output_vec_const_move (operands);
25407
+ return "stvx %1,%y0";
25410
- return "xxlxor %x0,%x0,%x0";
25411
+ return "lvx %0,%y1";
25414
- return output_vec_const_move (operands);
25415
+ if (TARGET_STRING)
25416
+ return \"stswi %1,%P0,16\";
25419
- return "stvx %1,%y0";
25423
- return "lvx %0,%y1";
25424
+ /* If the address is not used in the output, we can use lsi. Otherwise,
25425
+ fall through to generating four loads. */
25426
+ if (TARGET_STRING
25427
+ && ! reg_overlap_mentioned_p (operands[0], operands[1]))
25428
+ return \"lswi %0,%P1,16\";
25429
+ /* ... fall through ... */
25436
gcc_unreachable ();
25439
- [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
25440
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
25441
+ (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
25442
+ (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
25443
+ (const_string "always")
25444
+ (const_string "conditional")))])
25446
;; Explicit load/store expanders for the builtin functions
25447
(define_expand "vsx_load_<mode>"
25448
@@ -320,46 +664,48 @@
25451
(define_expand "vsx_store_<mode>"
25452
- [(set (match_operand:VEC_M 0 "memory_operand" "")
25453
- (match_operand:VEC_M 1 "vsx_register_operand" ""))]
25454
+ [(set (match_operand:VSX_M 0 "memory_operand" "")
25455
+ (match_operand:VSX_M 1 "vsx_register_operand" ""))]
25456
"VECTOR_MEM_VSX_P (<MODE>mode)"
25460
-;; VSX scalar and vector floating point arithmetic instructions
25461
+;; VSX vector floating point arithmetic instructions. The VSX scalar
25462
+;; instructions are now combined with the insn for the traditional floating
25464
(define_insn "*vsx_add<mode>3"
25465
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25466
- (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25467
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25468
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25469
+ (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25470
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25471
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25472
- "x<VSv>add<VSs> %x0,%x1,%x2"
25473
+ "xvadd<VSs> %x0,%x1,%x2"
25474
[(set_attr "type" "<VStype_simple>")
25475
(set_attr "fp_type" "<VSfptype_simple>")])
25477
(define_insn "*vsx_sub<mode>3"
25478
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25479
- (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25480
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25481
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25482
+ (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25483
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25484
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25485
- "x<VSv>sub<VSs> %x0,%x1,%x2"
25486
+ "xvsub<VSs> %x0,%x1,%x2"
25487
[(set_attr "type" "<VStype_simple>")
25488
(set_attr "fp_type" "<VSfptype_simple>")])
25490
(define_insn "*vsx_mul<mode>3"
25491
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25492
- (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25493
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25494
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25495
+ (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25496
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25497
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25498
- "x<VSv>mul<VSs> %x0,%x1,%x2"
25499
- [(set_attr "type" "<VStype_mul>")
25500
+ "xvmul<VSs> %x0,%x1,%x2"
25501
+ [(set_attr "type" "<VStype_simple>")
25502
(set_attr "fp_type" "<VSfptype_mul>")])
25504
(define_insn "*vsx_div<mode>3"
25505
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25506
- (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25507
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25508
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25509
+ (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25510
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25511
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25512
- "x<VSv>div<VSs> %x0,%x1,%x2"
25513
+ "xvdiv<VSs> %x0,%x1,%x2"
25514
[(set_attr "type" "<VStype_div>")
25515
(set_attr "fp_type" "<VSfptype_div>")])
25517
@@ -402,94 +748,72 @@
25518
(set_attr "fp_type" "<VSfptype_simple>")])
25520
(define_insn "vsx_fre<mode>2"
25521
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25522
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
25523
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25524
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
25526
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25527
- "x<VSv>re<VSs> %x0,%x1"
25528
+ "xvre<VSs> %x0,%x1"
25529
[(set_attr "type" "<VStype_simple>")
25530
(set_attr "fp_type" "<VSfptype_simple>")])
25532
(define_insn "*vsx_neg<mode>2"
25533
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25534
- (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
25535
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25536
+ (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
25537
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25538
- "x<VSv>neg<VSs> %x0,%x1"
25539
+ "xvneg<VSs> %x0,%x1"
25540
[(set_attr "type" "<VStype_simple>")
25541
(set_attr "fp_type" "<VSfptype_simple>")])
25543
(define_insn "*vsx_abs<mode>2"
25544
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25545
- (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
25546
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25547
+ (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
25548
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25549
- "x<VSv>abs<VSs> %x0,%x1"
25550
+ "xvabs<VSs> %x0,%x1"
25551
[(set_attr "type" "<VStype_simple>")
25552
(set_attr "fp_type" "<VSfptype_simple>")])
25554
(define_insn "vsx_nabs<mode>2"
25555
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25558
- (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
25559
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25562
+ (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
25563
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25564
- "x<VSv>nabs<VSs> %x0,%x1"
25565
+ "xvnabs<VSs> %x0,%x1"
25566
[(set_attr "type" "<VStype_simple>")
25567
(set_attr "fp_type" "<VSfptype_simple>")])
25569
(define_insn "vsx_smax<mode>3"
25570
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25571
- (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25572
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25573
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25574
+ (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25575
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25576
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25577
- "x<VSv>max<VSs> %x0,%x1,%x2"
25578
+ "xvmax<VSs> %x0,%x1,%x2"
25579
[(set_attr "type" "<VStype_simple>")
25580
(set_attr "fp_type" "<VSfptype_simple>")])
25582
(define_insn "*vsx_smin<mode>3"
25583
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25584
- (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25585
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
25586
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25587
+ (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25588
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
25589
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25590
- "x<VSv>min<VSs> %x0,%x1,%x2"
25591
+ "xvmin<VSs> %x0,%x1,%x2"
25592
[(set_attr "type" "<VStype_simple>")
25593
(set_attr "fp_type" "<VSfptype_simple>")])
25595
-;; Special VSX version of smin/smax for single precision floating point. Since
25596
-;; both numbers are rounded to single precision, we can just use the DP version
25597
-;; of the instruction.
25599
-(define_insn "*vsx_smaxsf3"
25600
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
25601
- (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
25602
- (match_operand:SF 2 "vsx_register_operand" "f")))]
25603
- "VECTOR_UNIT_VSX_P (DFmode)"
25604
- "xsmaxdp %x0,%x1,%x2"
25605
- [(set_attr "type" "fp")
25606
- (set_attr "fp_type" "fp_addsub_d")])
25608
-(define_insn "*vsx_sminsf3"
25609
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
25610
- (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
25611
- (match_operand:SF 2 "vsx_register_operand" "f")))]
25612
- "VECTOR_UNIT_VSX_P (DFmode)"
25613
- "xsmindp %x0,%x1,%x2"
25614
- [(set_attr "type" "fp")
25615
- (set_attr "fp_type" "fp_addsub_d")])
25617
(define_insn "*vsx_sqrt<mode>2"
25618
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25619
- (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
25620
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25621
+ (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
25622
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25623
- "x<VSv>sqrt<VSs> %x0,%x1"
25624
+ "xvsqrt<VSs> %x0,%x1"
25625
[(set_attr "type" "<VStype_sqrt>")
25626
(set_attr "fp_type" "<VSfptype_sqrt>")])
25628
(define_insn "*vsx_rsqrte<mode>2"
25629
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25630
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
25631
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25632
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
25634
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25635
- "x<VSv>rsqrte<VSs> %x0,%x1"
25636
+ "xvrsqrte<VSs> %x0,%x1"
25637
[(set_attr "type" "<VStype_simple>")
25638
(set_attr "fp_type" "<VSfptype_simple>")])
25640
@@ -528,27 +852,11 @@
25641
[(set_attr "type" "<VStype_simple>")
25642
(set_attr "fp_type" "<VSfptype_simple>")])
25644
-;; Fused vector multiply/add instructions Support the classical DF versions of
25645
-;; fma, which allows the target to be a separate register from the 3 inputs.
25646
-;; Under VSX, the target must be either the addend or the first multiply.
25647
-;; Where we can, also do the same for the Altivec V4SF fmas.
25648
+;; Fused vector multiply/add instructions. Support the classical Altivec
25649
+;; versions of fma, which allows the target to be a separate register from the
25650
+;; 3 inputs. Under VSX, the target must be either the addend or the first
25653
-(define_insn "*vsx_fmadf4"
25654
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
25656
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
25657
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
25658
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
25659
- "VECTOR_UNIT_VSX_P (DFmode)"
25661
- xsmaddadp %x0,%x1,%x2
25662
- xsmaddmdp %x0,%x1,%x3
25663
- xsmaddadp %x0,%x1,%x2
25664
- xsmaddmdp %x0,%x1,%x3
25665
- fmadd %0,%1,%2,%3"
25666
- [(set_attr "type" "fp")
25667
- (set_attr "fp_type" "fp_maddsub_d")])
25669
(define_insn "*vsx_fmav4sf4"
25670
[(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
25672
@@ -578,23 +886,6 @@
25673
xvmaddmdp %x0,%x1,%x3"
25674
[(set_attr "type" "vecdouble")])
25676
-(define_insn "*vsx_fmsdf4"
25677
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
25679
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
25680
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
25682
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
25683
- "VECTOR_UNIT_VSX_P (DFmode)"
25685
- xsmsubadp %x0,%x1,%x2
25686
- xsmsubmdp %x0,%x1,%x3
25687
- xsmsubadp %x0,%x1,%x2
25688
- xsmsubmdp %x0,%x1,%x3
25689
- fmsub %0,%1,%2,%3"
25690
- [(set_attr "type" "fp")
25691
- (set_attr "fp_type" "fp_maddsub_d")])
25693
(define_insn "*vsx_fms<mode>4"
25694
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
25696
@@ -604,29 +895,12 @@
25697
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
25698
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25700
- x<VSv>msuba<VSs> %x0,%x1,%x2
25701
- x<VSv>msubm<VSs> %x0,%x1,%x3
25702
- x<VSv>msuba<VSs> %x0,%x1,%x2
25703
- x<VSv>msubm<VSs> %x0,%x1,%x3"
25704
+ xvmsuba<VSs> %x0,%x1,%x2
25705
+ xvmsubm<VSs> %x0,%x1,%x3
25706
+ xvmsuba<VSs> %x0,%x1,%x2
25707
+ xvmsubm<VSs> %x0,%x1,%x3"
25708
[(set_attr "type" "<VStype_mul>")])
25710
-(define_insn "*vsx_nfmadf4"
25711
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
25714
- (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
25715
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
25716
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
25717
- "VECTOR_UNIT_VSX_P (DFmode)"
25719
- xsnmaddadp %x0,%x1,%x2
25720
- xsnmaddmdp %x0,%x1,%x3
25721
- xsnmaddadp %x0,%x1,%x2
25722
- xsnmaddmdp %x0,%x1,%x3
25723
- fnmadd %0,%1,%2,%3"
25724
- [(set_attr "type" "fp")
25725
- (set_attr "fp_type" "fp_maddsub_d")])
25727
(define_insn "*vsx_nfma<mode>4"
25728
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
25730
@@ -636,31 +910,13 @@
25731
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
25732
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25734
- x<VSv>nmadda<VSs> %x0,%x1,%x2
25735
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
25736
- x<VSv>nmadda<VSs> %x0,%x1,%x2
25737
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
25738
+ xvnmadda<VSs> %x0,%x1,%x2
25739
+ xvnmaddm<VSs> %x0,%x1,%x3
25740
+ xvnmadda<VSs> %x0,%x1,%x2
25741
+ xvnmaddm<VSs> %x0,%x1,%x3"
25742
[(set_attr "type" "<VStype_mul>")
25743
(set_attr "fp_type" "<VSfptype_mul>")])
25745
-(define_insn "*vsx_nfmsdf4"
25746
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
25749
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
25750
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
25752
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
25753
- "VECTOR_UNIT_VSX_P (DFmode)"
25755
- xsnmsubadp %x0,%x1,%x2
25756
- xsnmsubmdp %x0,%x1,%x3
25757
- xsnmsubadp %x0,%x1,%x2
25758
- xsnmsubmdp %x0,%x1,%x3
25759
- fnmsub %0,%1,%2,%3"
25760
- [(set_attr "type" "fp")
25761
- (set_attr "fp_type" "fp_maddsub_d")])
25763
(define_insn "*vsx_nfmsv4sf4"
25764
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
25766
@@ -722,16 +978,6 @@
25767
[(set_attr "type" "<VStype_simple>")
25768
(set_attr "fp_type" "<VSfptype_simple>")])
25770
-;; Floating point scalar compare
25771
-(define_insn "*vsx_cmpdf_internal1"
25772
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
25773
- (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
25774
- (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
25775
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25776
- && VECTOR_UNIT_VSX_P (DFmode)"
25777
- "xscmpudp %0,%x1,%x2"
25778
- [(set_attr "type" "fpcompare")])
25780
;; Compare vectors producing a vector result and a predicate, setting CR6 to
25781
;; indicate a combined status
25782
(define_insn "*vsx_eq_<mode>_p"
25783
@@ -798,13 +1044,13 @@
25786
(define_insn "vsx_copysign<mode>3"
25787
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25789
- [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
25790
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
25791
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25793
+ [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
25794
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
25796
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25797
- "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
25798
+ "xvcpsgn<VSs> %x0,%x2,%x1"
25799
[(set_attr "type" "<VStype_simple>")
25800
(set_attr "fp_type" "<VSfptype_simple>")])
25802
@@ -865,10 +1111,10 @@
25803
(set_attr "fp_type" "<VSfptype_simple>")])
25805
(define_insn "vsx_btrunc<mode>2"
25806
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25807
- (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
25808
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25809
+ (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
25810
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25811
- "x<VSv>r<VSs>iz %x0,%x1"
25812
+ "xvr<VSs>iz %x0,%x1"
25813
[(set_attr "type" "<VStype_simple>")
25814
(set_attr "fp_type" "<VSfptype_simple>")])
25816
@@ -882,20 +1128,20 @@
25817
(set_attr "fp_type" "<VSfptype_simple>")])
25819
(define_insn "vsx_floor<mode>2"
25820
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25821
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
25822
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25823
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
25825
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25826
- "x<VSv>r<VSs>im %x0,%x1"
25827
+ "xvr<VSs>im %x0,%x1"
25828
[(set_attr "type" "<VStype_simple>")
25829
(set_attr "fp_type" "<VSfptype_simple>")])
25831
(define_insn "vsx_ceil<mode>2"
25832
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
25833
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
25834
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
25835
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
25837
"VECTOR_UNIT_VSX_P (<MODE>mode)"
25838
- "x<VSv>r<VSs>ip %x0,%x1"
25839
+ "xvr<VSs>ip %x0,%x1"
25840
[(set_attr "type" "<VStype_simple>")
25841
(set_attr "fp_type" "<VSfptype_simple>")])
25843
@@ -942,6 +1188,40 @@
25845
[(set_attr "type" "fp")])
25847
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
25848
+(define_insn "vsx_xscvdpspn"
25849
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
25850
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
25851
+ UNSPEC_VSX_CVDPSPN))]
25852
+ "TARGET_XSCVDPSPN"
25853
+ "xscvdpspn %x0,%x1"
25854
+ [(set_attr "type" "fp")])
25856
+(define_insn "vsx_xscvspdpn"
25857
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
25858
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
25859
+ UNSPEC_VSX_CVSPDPN))]
25860
+ "TARGET_XSCVSPDPN"
25861
+ "xscvspdpn %x0,%x1"
25862
+ [(set_attr "type" "fp")])
25864
+(define_insn "vsx_xscvdpspn_scalar"
25865
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
25866
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
25867
+ UNSPEC_VSX_CVDPSPN))]
25868
+ "TARGET_XSCVDPSPN"
25869
+ "xscvdpspn %x0,%x1"
25870
+ [(set_attr "type" "fp")])
25872
+;; Used by direct move to move a SFmode value from GPR to VSX register
25873
+(define_insn "vsx_xscvspdpn_directmove"
25874
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
25875
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
25876
+ UNSPEC_VSX_CVSPDPN))]
25877
+ "TARGET_XSCVSPDPN"
25878
+ "xscvspdpn %x0,%x1"
25879
+ [(set_attr "type" "fp")])
25881
;; Convert from 64-bit to 32-bit types
25882
;; Note, favor the Altivec registers since the usual use of these instructions
25883
;; is in vector converts and we need to use the Altivec vperm instruction.
25884
@@ -1027,73 +1307,21 @@
25885
(set_attr "fp_type" "<VSfptype_simple>")])
25888
-;; Logical and permute operations
25889
-(define_insn "*vsx_and<mode>3"
25890
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25892
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
25893
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
25894
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25895
- "xxland %x0,%x1,%x2"
25896
- [(set_attr "type" "vecsimple")])
25898
-(define_insn "*vsx_ior<mode>3"
25899
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25900
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
25901
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
25902
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25903
- "xxlor %x0,%x1,%x2"
25904
- [(set_attr "type" "vecsimple")])
25906
-(define_insn "*vsx_xor<mode>3"
25907
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25909
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
25910
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
25911
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25912
- "xxlxor %x0,%x1,%x2"
25913
- [(set_attr "type" "vecsimple")])
25915
-(define_insn "*vsx_one_cmpl<mode>2"
25916
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25918
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
25919
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25920
- "xxlnor %x0,%x1,%x1"
25921
- [(set_attr "type" "vecsimple")])
25923
-(define_insn "*vsx_nor<mode>3"
25924
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25927
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
25928
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
25929
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25930
- "xxlnor %x0,%x1,%x2"
25931
- [(set_attr "type" "vecsimple")])
25933
-(define_insn "*vsx_andc<mode>3"
25934
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
25937
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
25938
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
25939
- "VECTOR_MEM_VSX_P (<MODE>mode)"
25940
- "xxlandc %x0,%x1,%x2"
25941
- [(set_attr "type" "vecsimple")])
25944
;; Permute operations
25946
;; Build a V2DF/V2DI vector from two scalars
25947
(define_insn "vsx_concat_<mode>"
25948
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
25950
- [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
25951
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
25952
- UNSPEC_VSX_CONCAT))]
25953
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
25954
+ (vec_concat:VSX_D
25955
+ (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
25956
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
25957
"VECTOR_MEM_VSX_P (<MODE>mode)"
25958
- "xxpermdi %x0,%x1,%x2,0"
25960
+ if (BYTES_BIG_ENDIAN)
25961
+ return "xxpermdi %x0,%x1,%x2,0";
25963
+ return "xxpermdi %x0,%x2,%x1,0";
25965
[(set_attr "type" "vecperm")])
25967
;; Special purpose concat using xxpermdi to glue two single precision values
25968
@@ -1106,9 +1334,161 @@
25969
(match_operand:SF 2 "vsx_register_operand" "f,f")]
25970
UNSPEC_VSX_CONCAT))]
25971
"VECTOR_MEM_VSX_P (V2DFmode)"
25972
- "xxpermdi %x0,%x1,%x2,0"
25974
+ if (BYTES_BIG_ENDIAN)
25975
+ return "xxpermdi %x0,%x1,%x2,0";
25977
+ return "xxpermdi %x0,%x2,%x1,0";
25979
[(set_attr "type" "vecperm")])
25981
+;; xxpermdi for little endian loads and stores. We need several of
25982
+;; these since the form of the PARALLEL differs by mode.
25983
+(define_insn "*vsx_xxpermdi2_le_<mode>"
25984
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
25985
+ (vec_select:VSX_D
25986
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
25987
+ (parallel [(const_int 1) (const_int 0)])))]
25988
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
25989
+ "xxpermdi %x0,%x1,%x1,2"
25990
+ [(set_attr "type" "vecperm")])
25992
+(define_insn "*vsx_xxpermdi4_le_<mode>"
25993
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
25994
+ (vec_select:VSX_W
25995
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
25996
+ (parallel [(const_int 2) (const_int 3)
25997
+ (const_int 0) (const_int 1)])))]
25998
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
25999
+ "xxpermdi %x0,%x1,%x1,2"
26000
+ [(set_attr "type" "vecperm")])
26002
+(define_insn "*vsx_xxpermdi8_le_V8HI"
26003
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
26005
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
26006
+ (parallel [(const_int 4) (const_int 5)
26007
+ (const_int 6) (const_int 7)
26008
+ (const_int 0) (const_int 1)
26009
+ (const_int 2) (const_int 3)])))]
26010
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
26011
+ "xxpermdi %x0,%x1,%x1,2"
26012
+ [(set_attr "type" "vecperm")])
26014
+(define_insn "*vsx_xxpermdi16_le_V16QI"
26015
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
26016
+ (vec_select:V16QI
26017
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
26018
+ (parallel [(const_int 8) (const_int 9)
26019
+ (const_int 10) (const_int 11)
26020
+ (const_int 12) (const_int 13)
26021
+ (const_int 14) (const_int 15)
26022
+ (const_int 0) (const_int 1)
26023
+ (const_int 2) (const_int 3)
26024
+ (const_int 4) (const_int 5)
26025
+ (const_int 6) (const_int 7)])))]
26026
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
26027
+ "xxpermdi %x0,%x1,%x1,2"
26028
+ [(set_attr "type" "vecperm")])
26030
+;; lxvd2x for little endian loads. We need several of
26031
+;; these since the form of the PARALLEL differs by mode.
26032
+(define_insn "*vsx_lxvd2x2_le_<mode>"
26033
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
26034
+ (vec_select:VSX_D
26035
+ (match_operand:VSX_D 1 "memory_operand" "Z")
26036
+ (parallel [(const_int 1) (const_int 0)])))]
26037
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
26039
+ [(set_attr "type" "vecload")])
26041
+(define_insn "*vsx_lxvd2x4_le_<mode>"
26042
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
26043
+ (vec_select:VSX_W
26044
+ (match_operand:VSX_W 1 "memory_operand" "Z")
26045
+ (parallel [(const_int 2) (const_int 3)
26046
+ (const_int 0) (const_int 1)])))]
26047
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
26049
+ [(set_attr "type" "vecload")])
26051
+(define_insn "*vsx_lxvd2x8_le_V8HI"
26052
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
26054
+ (match_operand:V8HI 1 "memory_operand" "Z")
26055
+ (parallel [(const_int 4) (const_int 5)
26056
+ (const_int 6) (const_int 7)
26057
+ (const_int 0) (const_int 1)
26058
+ (const_int 2) (const_int 3)])))]
26059
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
26061
+ [(set_attr "type" "vecload")])
26063
+(define_insn "*vsx_lxvd2x16_le_V16QI"
26064
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
26065
+ (vec_select:V16QI
26066
+ (match_operand:V16QI 1 "memory_operand" "Z")
26067
+ (parallel [(const_int 8) (const_int 9)
26068
+ (const_int 10) (const_int 11)
26069
+ (const_int 12) (const_int 13)
26070
+ (const_int 14) (const_int 15)
26071
+ (const_int 0) (const_int 1)
26072
+ (const_int 2) (const_int 3)
26073
+ (const_int 4) (const_int 5)
26074
+ (const_int 6) (const_int 7)])))]
26075
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
26077
+ [(set_attr "type" "vecload")])
26079
+;; stxvd2x for little endian stores. We need several of
26080
+;; these since the form of the PARALLEL differs by mode.
26081
+(define_insn "*vsx_stxvd2x2_le_<mode>"
26082
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
26083
+ (vec_select:VSX_D
26084
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
26085
+ (parallel [(const_int 1) (const_int 0)])))]
26086
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
26087
+ "stxvd2x %x1,%y0"
26088
+ [(set_attr "type" "vecstore")])
26090
+(define_insn "*vsx_stxvd2x4_le_<mode>"
26091
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
26092
+ (vec_select:VSX_W
26093
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
26094
+ (parallel [(const_int 2) (const_int 3)
26095
+ (const_int 0) (const_int 1)])))]
26096
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
26097
+ "stxvd2x %x1,%y0"
26098
+ [(set_attr "type" "vecstore")])
26100
+(define_insn "*vsx_stxvd2x8_le_V8HI"
26101
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
26103
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
26104
+ (parallel [(const_int 4) (const_int 5)
26105
+ (const_int 6) (const_int 7)
26106
+ (const_int 0) (const_int 1)
26107
+ (const_int 2) (const_int 3)])))]
26108
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
26109
+ "stxvd2x %x1,%y0"
26110
+ [(set_attr "type" "vecstore")])
26112
+(define_insn "*vsx_stxvd2x16_le_V16QI"
26113
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
26114
+ (vec_select:V16QI
26115
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
26116
+ (parallel [(const_int 8) (const_int 9)
26117
+ (const_int 10) (const_int 11)
26118
+ (const_int 12) (const_int 13)
26119
+ (const_int 14) (const_int 15)
26120
+ (const_int 0) (const_int 1)
26121
+ (const_int 2) (const_int 3)
26122
+ (const_int 4) (const_int 5)
26123
+ (const_int 6) (const_int 7)])))]
26124
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
26125
+ "stxvd2x %x1,%y0"
26126
+ [(set_attr "type" "vecstore")])
26128
;; Set the element of a V2DI/VD2F mode
26129
(define_insn "vsx_set_<mode>"
26130
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
26131
@@ -1118,9 +1498,10 @@
26133
"VECTOR_MEM_VSX_P (<MODE>mode)"
26135
- if (INTVAL (operands[3]) == 0)
26136
+ int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
26137
+ if (INTVAL (operands[3]) == idx_first)
26138
return \"xxpermdi %x0,%x2,%x1,1\";
26139
- else if (INTVAL (operands[3]) == 1)
26140
+ else if (INTVAL (operands[3]) == 1 - idx_first)
26141
return \"xxpermdi %x0,%x1,%x2,0\";
26143
gcc_unreachable ();
26144
@@ -1135,8 +1516,12 @@
26145
[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
26146
"VECTOR_MEM_VSX_P (<MODE>mode)"
26149
gcc_assert (UINTVAL (operands[2]) <= 1);
26150
- operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
26151
+ fldDM = INTVAL (operands[2]) << 1;
26152
+ if (!BYTES_BIG_ENDIAN)
26153
+ fldDM = 3 - fldDM;
26154
+ operands[3] = GEN_INT (fldDM);
26155
return \"xxpermdi %x0,%x1,%x1,%3\";
26157
[(set_attr "type" "vecperm")])
26158
@@ -1149,9 +1534,28 @@
26159
(parallel [(const_int 0)])))]
26160
"VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
26162
- [(set_attr "type" "fpload")
26163
+ [(set (attr "type")
26165
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26166
+ (const_string "fpload_ux")
26167
+ (const_string "fpload")))
26168
(set_attr "length" "4")])
26170
+;; Optimize extracting element 1 from memory for little endian
26171
+(define_insn "*vsx_extract_<mode>_one_le"
26172
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
26173
+ (vec_select:<VS_scalar>
26174
+ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
26175
+ (parallel [(const_int 1)])))]
26176
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
26177
+ "lxsd%U1x %x0,%y1"
26178
+ [(set (attr "type")
26180
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26181
+ (const_string "fpload_ux")
26182
+ (const_string "fpload")))
26183
+ (set_attr "length" "4")])
26185
;; Extract a SF element from V4SF
26186
(define_insn_and_split "vsx_extract_v4sf"
26187
[(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
26188
@@ -1172,7 +1576,7 @@
26189
rtx op2 = operands[2];
26190
rtx op3 = operands[3];
26192
- HOST_WIDE_INT ele = INTVAL (op2);
26193
+ HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
26197
@@ -1213,11 +1617,22 @@
26198
if (<MODE>mode != V2DImode)
26200
target = gen_lowpart (V2DImode, target);
26201
- op0 = gen_lowpart (V2DImode, target);
26202
- op1 = gen_lowpart (V2DImode, target);
26203
+ op0 = gen_lowpart (V2DImode, op0);
26204
+ op1 = gen_lowpart (V2DImode, op1);
26207
- emit_insn (gen (target, op0, op1, perm0, perm1));
26208
+ /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
26209
+ transformation we don't want; it is necessary for
26210
+ rs6000_expand_vec_perm_const_1 but not for this use. So we
26211
+ prepare for that by reversing the transformation here. */
26212
+ if (BYTES_BIG_ENDIAN)
26213
+ emit_insn (gen (target, op0, op1, perm0, perm1));
26216
+ rtx p0 = GEN_INT (3 - INTVAL (perm1));
26217
+ rtx p1 = GEN_INT (3 - INTVAL (perm0));
26218
+ emit_insn (gen (target, op1, op0, p0, p1));
26223
@@ -1231,9 +1646,32 @@
26224
(match_operand 4 "const_2_to_3_operand" "")])))]
26225
"VECTOR_MEM_VSX_P (<MODE>mode)"
26227
- int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2);
26228
+ int op3, op4, mask;
26230
+ /* For little endian, swap operands and invert/swap selectors
26231
+ to get the correct xxpermdi. The operand swap sets up the
26232
+ inputs as a little endian array. The selectors are swapped
26233
+ because they are defined to use big endian ordering. The
26234
+ selectors are inverted to get the correct doublewords for
26235
+ little endian ordering. */
26236
+ if (BYTES_BIG_ENDIAN)
26238
+ op3 = INTVAL (operands[3]);
26239
+ op4 = INTVAL (operands[4]);
26243
+ op3 = 3 - INTVAL (operands[4]);
26244
+ op4 = 3 - INTVAL (operands[3]);
26247
+ mask = (op3 << 1) | (op4 - 2);
26248
operands[3] = GEN_INT (mask);
26249
- return "xxpermdi %x0,%x1,%x2,%3";
26251
+ if (BYTES_BIG_ENDIAN)
26252
+ return "xxpermdi %x0,%x1,%x2,%3";
26254
+ return "xxpermdi %x0,%x2,%x1,%3";
26256
[(set_attr "type" "vecperm")])
26258
@@ -1252,25 +1690,57 @@
26260
;; Expanders for builtins
26261
(define_expand "vsx_mergel_<mode>"
26262
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
26263
- (vec_select:VSX_D
26264
- (vec_concat:<VS_double>
26265
- (match_operand:VSX_D 1 "vsx_register_operand" "")
26266
- (match_operand:VSX_D 2 "vsx_register_operand" ""))
26267
- (parallel [(const_int 1) (const_int 3)])))]
26268
+ [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
26269
+ (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
26270
+ (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
26271
"VECTOR_MEM_VSX_P (<MODE>mode)"
26277
+ /* Special handling for LE with -maltivec=be. */
26278
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
26280
+ v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
26281
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
26285
+ v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
26286
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
26289
+ x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
26290
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
26294
(define_expand "vsx_mergeh_<mode>"
26295
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
26296
- (vec_select:VSX_D
26297
- (vec_concat:<VS_double>
26298
- (match_operand:VSX_D 1 "vsx_register_operand" "")
26299
- (match_operand:VSX_D 2 "vsx_register_operand" ""))
26300
- (parallel [(const_int 0) (const_int 2)])))]
26301
+ [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
26302
+ (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
26303
+ (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
26304
"VECTOR_MEM_VSX_P (<MODE>mode)"
26310
+ /* Special handling for LE with -maltivec=be. */
26311
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
26313
+ v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
26314
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
26318
+ v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
26319
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
26322
+ x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
26323
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
26328
(define_insn "vsx_splat_<mode>"
26329
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
26330
@@ -1295,6 +1765,20 @@
26332
[(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
26333
"VECTOR_MEM_VSX_P (<MODE>mode)"
26335
+ if (!BYTES_BIG_ENDIAN)
26336
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
26338
+ return "xxspltw %x0,%x1,%2";
26340
+ [(set_attr "type" "vecperm")])
26342
+(define_insn "vsx_xxspltw_<mode>_direct"
26343
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
26344
+ (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
26345
+ (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
26346
+ UNSPEC_VSX_XXSPLTW))]
26347
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
26348
"xxspltw %x0,%x1,%2"
26349
[(set_attr "type" "vecperm")])
26351
@@ -1483,3 +1967,27 @@
26353
[(set_attr "length" "20")
26354
(set_attr "type" "veccomplex")])
26357
+;; Power8 Vector fusion. The fused ops must be physically adjacent.
26359
+ [(set (match_operand:P 0 "base_reg_operand" "")
26360
+ (match_operand:P 1 "short_cint_operand" ""))
26361
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
26362
+ (mem:VSX_M2 (plus:P (match_dup 0)
26363
+ (match_operand:P 3 "int_reg_operand" ""))))]
26364
+ "TARGET_VSX && TARGET_P8_FUSION"
26365
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
26366
+ [(set_attr "length" "8")
26367
+ (set_attr "type" "vecload")])
26370
+ [(set (match_operand:P 0 "base_reg_operand" "")
26371
+ (match_operand:P 1 "short_cint_operand" ""))
26372
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
26373
+ (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
26374
+ (match_dup 0))))]
26375
+ "TARGET_VSX && TARGET_P8_FUSION"
26376
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
26377
+ [(set_attr "length" "8")
26378
+ (set_attr "type" "vecload")])
26379
--- a/src/gcc/config/rs6000/rs6000.h
26380
+++ b/src/gcc/config/rs6000/rs6000.h
26382
#ifdef HAVE_AS_POWER8
26383
#define ASM_CPU_POWER8_SPEC "-mpower8"
26385
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
26386
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
26390
@@ -164,6 +164,7 @@
26391
%{mcpu=e6500: -me6500} \
26392
%{maltivec: -maltivec} \
26393
%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
26394
+%{mpower8-vector|mcrypto|mdirect-move|mhtm: %{!mcpu*: %(asm_cpu_power8)}} \
26397
#define CPP_DEFAULT_SPEC ""
26398
@@ -277,6 +278,21 @@
26399
#define TARGET_POPCNTD 0
26402
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
26403
+ waitasecond instruction. Allow -mpower8-fusion, since it does not add new
26406
+#ifndef HAVE_AS_POWER8
26407
+#undef TARGET_DIRECT_MOVE
26408
+#undef TARGET_CRYPTO
26410
+#undef TARGET_P8_VECTOR
26411
+#define TARGET_DIRECT_MOVE 0
26412
+#define TARGET_CRYPTO 0
26413
+#define TARGET_HTM 0
26414
+#define TARGET_P8_VECTOR 0
26417
/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If
26418
not, generate the lwsync code as an integer constant. */
26419
#ifdef HAVE_AS_LWSYNC
26420
@@ -386,6 +402,7 @@
26421
#define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET)
26422
#define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN)
26424
+/* Describe the vector unit used for arithmetic operations. */
26425
extern enum rs6000_vector rs6000_vector_unit[];
26427
#define VECTOR_UNIT_NONE_P(MODE) \
26428
@@ -394,12 +411,25 @@
26429
#define VECTOR_UNIT_VSX_P(MODE) \
26430
(rs6000_vector_unit[(MODE)] == VECTOR_VSX)
26432
+#define VECTOR_UNIT_P8_VECTOR_P(MODE) \
26433
+ (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
26435
#define VECTOR_UNIT_ALTIVEC_P(MODE) \
26436
(rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
26438
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \
26439
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
26440
+ (int)VECTOR_VSX, \
26441
+ (int)VECTOR_P8_VECTOR))
26443
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
26444
+ altivec (VMX) or VSX vector instructions. P8 vector support is upwards
26445
+ compatible, so allow it as well, rather than changing all of the uses of the
26447
#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \
26448
- (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC \
26449
- || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
26450
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
26451
+ (int)VECTOR_ALTIVEC, \
26452
+ (int)VECTOR_P8_VECTOR))
26454
/* Describe whether to use VSX loads or Altivec loads. For now, just use the
26455
same unit as the vector unit we are using, but we may want to migrate to
26456
@@ -412,12 +442,21 @@
26457
#define VECTOR_MEM_VSX_P(MODE) \
26458
(rs6000_vector_mem[(MODE)] == VECTOR_VSX)
26460
+#define VECTOR_MEM_P8_VECTOR_P(MODE) \
26461
+ (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
26463
#define VECTOR_MEM_ALTIVEC_P(MODE) \
26464
(rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
26466
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \
26467
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
26468
+ (int)VECTOR_VSX, \
26469
+ (int)VECTOR_P8_VECTOR))
26471
#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \
26472
- (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC \
26473
- || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
26474
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
26475
+ (int)VECTOR_ALTIVEC, \
26476
+ (int)VECTOR_P8_VECTOR))
26478
/* Return the alignment of a given vector type, which is set based on the
26479
vector unit use. VSX for instance can load 32 or 64 bit aligned words
26480
@@ -429,6 +468,15 @@
26481
? rs6000_vector_align[(MODE)] \
26482
: (int)GET_MODE_BITSIZE ((MODE)))
26484
+/* Determine the element order to use for vector instructions. By
26485
+ default we use big-endian element order when targeting big-endian,
26486
+ and little-endian element order when targeting little-endian. For
26487
+ programs being ported from BE Power to LE Power, it can sometimes
26488
+ be useful to use big-endian element order when targeting little-endian.
26489
+ This is set via -maltivec=be, for example. */
26490
+#define VECTOR_ELT_ORDER_BIG \
26491
+ (BYTES_BIG_ENDIAN || (rs6000_altivec_element_order == 2))
26493
/* Alignment options for fields in structures for sub-targets following
26495
ALIGN_POWER word-aligns FP doubles (default AIX ABI).
26496
@@ -479,16 +527,37 @@
26497
#define TARGET_FCTIDUZ TARGET_POPCNTD
26498
#define TARGET_FCTIWUZ TARGET_POPCNTD
26500
+#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
26501
+#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
26503
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
26504
+ in power7, so conditionalize them on p8 features. TImode syncs need quad
26505
+ memory support. */
26506
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY \
26507
+ || TARGET_QUAD_MEMORY_ATOMIC \
26508
+ || TARGET_DIRECT_MOVE)
26510
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY_ATOMIC
26512
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
26513
+ to allocate the SDmode stack slot to get the value into the proper location
26514
+ in the register. */
26515
+#define TARGET_NO_SDMODE_STACK (TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
26517
/* In switching from using target_flags to using rs6000_isa_flags, the options
26518
machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>. For now map
26519
OPTION_MASK_<xxx> back into MASK_<xxx>. */
26520
#define MASK_ALTIVEC OPTION_MASK_ALTIVEC
26521
#define MASK_CMPB OPTION_MASK_CMPB
26522
+#define MASK_CRYPTO OPTION_MASK_CRYPTO
26523
#define MASK_DFP OPTION_MASK_DFP
26524
+#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE
26525
#define MASK_DLMZB OPTION_MASK_DLMZB
26526
#define MASK_EABI OPTION_MASK_EABI
26527
#define MASK_FPRND OPTION_MASK_FPRND
26528
+#define MASK_P8_FUSION OPTION_MASK_P8_FUSION
26529
#define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT
26530
+#define MASK_HTM OPTION_MASK_HTM
26531
#define MASK_ISEL OPTION_MASK_ISEL
26532
#define MASK_MFCRF OPTION_MASK_MFCRF
26533
#define MASK_MFPGPR OPTION_MASK_MFPGPR
26534
@@ -495,6 +564,7 @@
26535
#define MASK_MULHW OPTION_MASK_MULHW
26536
#define MASK_MULTIPLE OPTION_MASK_MULTIPLE
26537
#define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE
26538
+#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR
26539
#define MASK_POPCNTB OPTION_MASK_POPCNTB
26540
#define MASK_POPCNTD OPTION_MASK_POPCNTD
26541
#define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT
26542
@@ -505,6 +575,7 @@
26543
#define MASK_STRING OPTION_MASK_STRING
26544
#define MASK_UPDATE OPTION_MASK_UPDATE
26545
#define MASK_VSX OPTION_MASK_VSX
26546
+#define MASK_VSX_TIMODE OPTION_MASK_VSX_TIMODE
26549
#define MASK_POWERPC64 OPTION_MASK_POWERPC64
26550
@@ -558,6 +629,25 @@
26551
|| rs6000_cpu == PROCESSOR_PPC8548)
26554
+/* Whether SF/DF operations are supported on the E500. */
26555
+#define TARGET_SF_SPE (TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT \
26558
+#define TARGET_DF_SPE (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT \
26559
+ && !TARGET_FPRS && TARGET_E500_DOUBLE)
26561
+/* Whether SF/DF operations are supported by by the normal floating point unit
26562
+ (or the vector/scalar unit). */
26563
+#define TARGET_SF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \
26564
+ && TARGET_SINGLE_FLOAT)
26566
+#define TARGET_DF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \
26567
+ && TARGET_DOUBLE_FLOAT)
26569
+/* Whether SF/DF operations are supported by any hardware. */
26570
+#define TARGET_SF_INSN (TARGET_SF_FPR || TARGET_SF_SPE)
26571
+#define TARGET_DF_INSN (TARGET_DF_FPR || TARGET_DF_SPE)
26573
/* Which machine supports the various reciprocal estimate instructions. */
26574
#define TARGET_FRES (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
26575
&& TARGET_FPRS && TARGET_SINGLE_FLOAT)
26576
@@ -595,9 +685,6 @@
26577
#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
26578
(rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
26580
-#define RS6000_RECIP_HIGH_PRECISION_P(MODE) \
26581
- ((MODE) == SFmode || (MODE) == V4SFmode || TARGET_RECIP_PRECISION)
26583
/* The default CPU for TARGET_OPTION_OVERRIDE. */
26584
#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
26586
@@ -842,15 +929,17 @@
26587
in inline functions.
26589
Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
26590
- pointer, which is eventually eliminated in favor of SP or FP. */
26591
+ pointer, which is eventually eliminated in favor of SP or FP.
26593
-#define FIRST_PSEUDO_REGISTER 114
26594
+ The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS. */
26596
+#define FIRST_PSEUDO_REGISTER 117
26598
/* This must be included for pre gcc 3.0 glibc compatibility. */
26599
#define PRE_GCC3_DWARF_FRAME_REGISTERS 77
26601
/* Add 32 dwarf columns for synthetic SPE registers. */
26602
-#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 1) + 32)
26603
+#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 4) + 32)
26605
/* The SPE has an additional 32 synthetic registers, with DWARF debug
26606
info numbering for these registers starting at 1200. While eh_frame
26607
@@ -866,7 +955,7 @@
26608
We must map them here to avoid huge unwinder tables mostly consisting
26609
of unused space. */
26610
#define DWARF_REG_TO_UNWIND_COLUMN(r) \
26611
- ((r) > 1200 ? ((r) - 1200 + FIRST_PSEUDO_REGISTER - 1) : (r))
26612
+ ((r) > 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r))
26614
/* Use standard DWARF numbering for DWARF debugging information. */
26615
#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
26616
@@ -906,7 +995,7 @@
26617
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26618
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26621
+ , 1, 1, 1, 1, 1, 1 \
26624
/* 1 for registers not available across function calls.
26625
@@ -926,7 +1015,7 @@
26626
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26627
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26630
+ , 1, 1, 1, 1, 1, 1 \
26633
/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
26634
@@ -945,7 +1034,7 @@
26635
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26636
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
26639
+ , 0, 0, 0, 0, 0, 0 \
26642
#define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
26643
@@ -984,6 +1073,9 @@
26644
vrsave, vscr (fixed)
26645
spe_acc, spefscr (fixed)
26653
@@ -1004,7 +1096,9 @@
26655
#define REG_ALLOC_ORDER \
26657
- 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \
26658
+ /* move fr13 (ie 45) later, so if we need TFmode, it does */ \
26659
+ /* not use fr14 which is a saved register. */ \
26660
+ 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \
26662
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
26663
50, 49, 48, 47, 46, \
26664
@@ -1023,7 +1117,7 @@
26665
96, 95, 94, 93, 92, 91, \
26666
108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, \
26669
+ 111, 112, 113, 114, 115, 116 \
26672
/* True if register is floating-point. */
26673
@@ -1064,8 +1158,11 @@
26674
#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
26676
/* Alternate name for any vector register supporting logical operations, no
26677
- matter which instruction set(s) are available. */
26678
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
26679
+ matter which instruction set(s) are available. Allow GPRs as well as the
26680
+ vector registers. */
26681
+#define VLOGICAL_REGNO_P(N) \
26682
+ (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N) \
26683
+ || (TARGET_VSX && FP_REGNO_P (N))) \
26685
/* Return number of consecutive hard regs needed starting at reg REGNO
26686
to hold something of mode MODE. */
26687
@@ -1125,28 +1222,32 @@
26688
/* Value is 1 if it is a good idea to tie two pseudo registers
26689
when one has mode MODE1 and one has mode MODE2.
26690
If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
26691
- for any hard reg, then this must be 0 for correct output. */
26692
-#define MODES_TIEABLE_P(MODE1, MODE2) \
26693
- (SCALAR_FLOAT_MODE_P (MODE1) \
26694
+ for any hard reg, then this must be 0 for correct output.
26696
+ PTImode cannot tie with other modes because PTImode is restricted to even
26697
+ GPR registers, and TImode can go in any GPR as well as VSX registers (PR
26699
+#define MODES_TIEABLE_P(MODE1, MODE2) \
26700
+ ((MODE1) == PTImode \
26701
+ ? (MODE2) == PTImode \
26702
+ : (MODE2) == PTImode \
26704
+ : SCALAR_FLOAT_MODE_P (MODE1) \
26705
? SCALAR_FLOAT_MODE_P (MODE2) \
26706
: SCALAR_FLOAT_MODE_P (MODE2) \
26707
- ? SCALAR_FLOAT_MODE_P (MODE1) \
26709
: GET_MODE_CLASS (MODE1) == MODE_CC \
26710
? GET_MODE_CLASS (MODE2) == MODE_CC \
26711
: GET_MODE_CLASS (MODE2) == MODE_CC \
26712
- ? GET_MODE_CLASS (MODE1) == MODE_CC \
26714
: SPE_VECTOR_MODE (MODE1) \
26715
? SPE_VECTOR_MODE (MODE2) \
26716
: SPE_VECTOR_MODE (MODE2) \
26717
- ? SPE_VECTOR_MODE (MODE1) \
26718
- : ALTIVEC_VECTOR_MODE (MODE1) \
26719
- ? ALTIVEC_VECTOR_MODE (MODE2) \
26720
- : ALTIVEC_VECTOR_MODE (MODE2) \
26721
- ? ALTIVEC_VECTOR_MODE (MODE1) \
26723
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
26724
? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
26725
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
26726
- ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
26730
/* Post-reload, we can't use any new AltiVec registers, as we already
26731
@@ -1240,6 +1341,7 @@
26739
@@ -1270,6 +1372,7 @@
26744
"NON_SPECIAL_REGS", \
26747
@@ -1299,6 +1402,7 @@
26748
{ 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */ \
26749
{ 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */ \
26750
{ 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */ \
26751
+ { 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, /* SPR_REGS */ \
26752
{ 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \
26753
{ 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */ \
26754
{ 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */ \
26755
@@ -1309,7 +1413,7 @@
26756
{ 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */ \
26757
{ 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000 }, /* NON_FLOAT_REGS */ \
26758
{ 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */ \
26759
- { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0003ffff } /* ALL_REGS */ \
26760
+ { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0007ffff } /* ALL_REGS */ \
26763
/* The same information, inverted:
26764
@@ -1337,7 +1441,18 @@
26765
RS6000_CONSTRAINT_wa, /* Any VSX register */
26766
RS6000_CONSTRAINT_wd, /* VSX register for V2DF */
26767
RS6000_CONSTRAINT_wf, /* VSX register for V4SF */
26768
+ RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */
26769
+ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */
26770
+ RS6000_CONSTRAINT_wm, /* VSX register for direct move */
26771
+ RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */
26772
RS6000_CONSTRAINT_ws, /* VSX register for DF */
26773
+ RS6000_CONSTRAINT_wt, /* VSX register for TImode */
26774
+ RS6000_CONSTRAINT_wu, /* Altivec register for float load/stores. */
26775
+ RS6000_CONSTRAINT_wv, /* Altivec register for double load/stores. */
26776
+ RS6000_CONSTRAINT_ww, /* FP or VSX register for vsx float ops. */
26777
+ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
26778
+ RS6000_CONSTRAINT_wy, /* VSX register for SF */
26779
+ RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */
26780
RS6000_CONSTRAINT_MAX
26783
@@ -1425,21 +1540,14 @@
26785
#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 || flag_asan != 0)
26787
-/* Size of the outgoing register save area */
26788
-#define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX \
26789
- || DEFAULT_ABI == ABI_DARWIN) \
26790
- ? (TARGET_64BIT ? 64 : 32) \
26793
/* Size of the fixed area on the stack */
26794
#define RS6000_SAVE_AREA \
26795
- (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_DARWIN) ? 24 : 8) \
26796
+ ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24) \
26797
<< (TARGET_64BIT ? 1 : 0))
26799
-/* MEM representing address to save the TOC register */
26800
-#define RS6000_SAVE_TOC gen_rtx_MEM (Pmode, \
26801
- plus_constant (Pmode, stack_pointer_rtx, \
26802
- (TARGET_32BIT ? 20 : 40)))
26803
+/* Stack offset for toc save slot. */
26804
+#define RS6000_TOC_SAVE_SLOT \
26805
+ ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0))
26807
/* Align an address */
26808
#define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
26809
@@ -1489,7 +1597,7 @@
26810
/* Define this if stack space is still allocated for a parameter passed
26811
in a register. The value is the number of bytes allocated to this
26813
-#define REG_PARM_STACK_SPACE(FNDECL) RS6000_REG_SAVE
26814
+#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
26816
/* Define this if the above stack space is to be considered part of the
26817
space allocated by the caller. */
26818
@@ -1522,7 +1630,7 @@
26819
NONLOCAL needs twice Pmode to maintain both backchain and SP. */
26820
#define STACK_SAVEAREA_MODE(LEVEL) \
26821
(LEVEL == SAVE_FUNCTION ? VOIDmode \
26822
- : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : TImode) : Pmode)
26823
+ : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode)
26825
/* Minimum and maximum general purpose registers used to hold arguments. */
26826
#define GP_ARG_MIN_REG 3
26827
@@ -1533,9 +1641,8 @@
26828
#define FP_ARG_MIN_REG 33
26829
#define FP_ARG_AIX_MAX_REG 45
26830
#define FP_ARG_V4_MAX_REG 40
26831
-#define FP_ARG_MAX_REG ((DEFAULT_ABI == ABI_AIX \
26832
- || DEFAULT_ABI == ABI_DARWIN) \
26833
- ? FP_ARG_AIX_MAX_REG : FP_ARG_V4_MAX_REG)
26834
+#define FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4 \
26835
+ ? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG)
26836
#define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
26838
/* Minimum and maximum AltiVec registers used to hold arguments. */
26839
@@ -1543,10 +1650,17 @@
26840
#define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
26841
#define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
26843
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument. */
26844
+#define AGGR_ARG_NUM_REG 8
26846
/* Return registers */
26847
#define GP_ARG_RETURN GP_ARG_MIN_REG
26848
#define FP_ARG_RETURN FP_ARG_MIN_REG
26849
#define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
26850
+#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN \
26851
+ : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
26852
+#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? ALTIVEC_ARG_RETURN \
26853
+ : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
26855
/* Flags for the call/call_value rtl operations set up by function_arg */
26856
#define CALL_NORMAL 0x00000000 /* no special processing */
26857
@@ -1566,8 +1680,10 @@
26858
On RS/6000, this is r3, fp1, and v2 (for AltiVec). */
26859
#define FUNCTION_VALUE_REGNO_P(N) \
26860
((N) == GP_ARG_RETURN \
26861
- || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT && TARGET_FPRS) \
26862
- || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
26863
+ || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN \
26864
+ && TARGET_HARD_FLOAT && TARGET_FPRS) \
26865
+ || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN \
26866
+ && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
26868
/* 1 if N is a possible register number for function argument passing.
26869
On RS/6000, these are r3-r10 and fp1-fp13.
26870
@@ -1691,11 +1807,8 @@
26871
/* Number of bytes into the frame return addresses can be found. See
26872
rs6000_stack_info in rs6000.c for more information on how the different
26873
abi's store the return address. */
26874
-#define RETURN_ADDRESS_OFFSET \
26875
- ((DEFAULT_ABI == ABI_AIX \
26876
- || DEFAULT_ABI == ABI_DARWIN) ? (TARGET_32BIT ? 8 : 16) : \
26877
- (DEFAULT_ABI == ABI_V4) ? 4 : \
26878
- (internal_error ("RETURN_ADDRESS_OFFSET not supported"), 0))
26879
+#define RETURN_ADDRESS_OFFSET \
26880
+ ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0))
26882
/* The current return address is in link register (65). The return address
26883
of anything farther back is accessed normally at an offset of 8 from the
26884
@@ -2215,6 +2328,9 @@
26885
&rs6000_reg_names[111][0], /* spe_acc */ \
26886
&rs6000_reg_names[112][0], /* spefscr */ \
26887
&rs6000_reg_names[113][0], /* sfp */ \
26888
+ &rs6000_reg_names[114][0], /* tfhar */ \
26889
+ &rs6000_reg_names[115][0], /* tfiar */ \
26890
+ &rs6000_reg_names[116][0], /* texasr */ \
26893
/* Table of additional register names to use in user input. */
26894
@@ -2268,7 +2384,9 @@
26895
{"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96}, \
26896
{"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100}, \
26897
{"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104}, \
26898
- {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108} }
26899
+ {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108}, \
26900
+ /* Transactional Memory Facility (HTM) Registers. */ \
26901
+ {"tfhar", 114}, {"tfiar", 115}, {"texasr", 116} }
26903
/* This is how to output an element of a case-vector that is relative. */
26905
@@ -2357,7 +2475,12 @@
26906
#define RS6000_BTC_ATTR_MASK 0x00000700 /* Mask of the attributes. */
26908
/* Miscellaneous information. */
26909
-#define RS6000_BTC_OVERLOADED 0x4000000 /* function is overloaded. */
26910
+#define RS6000_BTC_SPR 0x01000000 /* function references SPRs. */
26911
+#define RS6000_BTC_VOID 0x02000000 /* function has no return value. */
26912
+#define RS6000_BTC_OVERLOADED 0x04000000 /* function is overloaded. */
26913
+#define RS6000_BTC_32BIT 0x08000000 /* function references SPRs. */
26914
+#define RS6000_BTC_64BIT 0x10000000 /* function references SPRs. */
26915
+#define RS6000_BTC_MISC_MASK 0x1f000000 /* Mask of the misc info. */
26917
/* Convenience macros to document the instruction type. */
26918
#define RS6000_BTC_MEM RS6000_BTC_MISC /* load/store touches mem. */
26919
@@ -2369,6 +2492,9 @@
26920
#define RS6000_BTM_ALWAYS 0 /* Always enabled. */
26921
#define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */
26922
#define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */
26923
+#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */
26924
+#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */
26925
+#define RS6000_BTM_HTM MASK_HTM /* hardware TM funcs. */
26926
#define RS6000_BTM_SPE MASK_STRING /* E500 */
26927
#define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */
26928
#define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */
26929
@@ -2380,10 +2506,13 @@
26931
#define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \
26933
+ | RS6000_BTM_P8_VECTOR \
26934
+ | RS6000_BTM_CRYPTO \
26936
| RS6000_BTM_FRES \
26937
| RS6000_BTM_FRSQRTE \
26938
| RS6000_BTM_FRSQRTES \
26939
+ | RS6000_BTM_HTM \
26940
| RS6000_BTM_POPCNTD \
26943
@@ -2395,6 +2524,7 @@
26944
#undef RS6000_BUILTIN_A
26945
#undef RS6000_BUILTIN_D
26946
#undef RS6000_BUILTIN_E
26947
+#undef RS6000_BUILTIN_H
26948
#undef RS6000_BUILTIN_P
26949
#undef RS6000_BUILTIN_Q
26950
#undef RS6000_BUILTIN_S
26951
@@ -2406,6 +2536,7 @@
26952
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26953
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26954
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26955
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26956
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26957
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26958
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
26959
@@ -2424,6 +2555,7 @@
26960
#undef RS6000_BUILTIN_A
26961
#undef RS6000_BUILTIN_D
26962
#undef RS6000_BUILTIN_E
26963
+#undef RS6000_BUILTIN_H
26964
#undef RS6000_BUILTIN_P
26965
#undef RS6000_BUILTIN_Q
26966
#undef RS6000_BUILTIN_S
26967
--- a/src/gcc/config/rs6000/altivec.md
26968
+++ b/src/gcc/config/rs6000/altivec.md
26969
@@ -41,15 +41,12 @@
26982
+ UNSPEC_VPACK_SIGN_SIGN_SAT
26983
+ UNSPEC_VPACK_SIGN_UNS_SAT
26984
+ UNSPEC_VPACK_UNS_UNS_SAT
26985
+ UNSPEC_VPACK_UNS_UNS_MOD
26986
+ UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
26990
@@ -71,12 +68,12 @@
26995
+ UNSPEC_VUNPACK_HI_SIGN
26996
+ UNSPEC_VUNPACK_LO_SIGN
26997
+ UNSPEC_VUNPACK_HI_SIGN_DIRECT
26998
+ UNSPEC_VUNPACK_LO_SIGN_DIRECT
27007
@@ -134,6 +131,11 @@
27012
+ UNSPEC_VMRGH_DIRECT
27013
+ UNSPEC_VMRGL_DIRECT
27014
+ UNSPEC_VSPLT_DIRECT
27015
+ UNSPEC_VSUMSWS_DIRECT
27018
(define_c_enum "unspecv"
27019
@@ -146,6 +148,8 @@
27022
(define_mode_iterator VI [V4SI V8HI V16QI])
27023
+;; Like VI, but add ISA 2.07 integer vector ops
27024
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
27025
;; Short vec in modes
27026
(define_mode_iterator VIshort [V8HI V16QI])
27028
@@ -159,9 +163,19 @@
27029
;; Like VM, except don't do TImode
27030
(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
27032
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
27033
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
27034
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
27035
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
27036
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
27037
+ (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
27038
+ (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
27039
+ (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
27041
+;; Vector pack/unpack
27042
+(define_mode_iterator VP [V2DI V4SI V8HI])
27043
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
27044
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
27045
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
27047
;; Vector move instructions.
27048
(define_insn "*altivec_mov<mode>"
27049
[(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v")
27050
@@ -378,10 +392,10 @@
27053
(define_insn "add<mode>3"
27054
- [(set (match_operand:VI 0 "register_operand" "=v")
27055
- (plus:VI (match_operand:VI 1 "register_operand" "v")
27056
- (match_operand:VI 2 "register_operand" "v")))]
27058
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27059
+ (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
27060
+ (match_operand:VI2 2 "register_operand" "v")))]
27062
"vaddu<VI_char>m %0,%1,%2"
27063
[(set_attr "type" "vecsimple")])
27065
@@ -398,7 +412,7 @@
27066
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
27067
(match_operand:V4SI 2 "register_operand" "v")]
27070
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
27072
[(set_attr "type" "vecsimple")])
27074
@@ -405,10 +419,10 @@
27075
(define_insn "altivec_vaddu<VI_char>s"
27076
[(set (match_operand:VI 0 "register_operand" "=v")
27077
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
27078
- (match_operand:VI 2 "register_operand" "v")]
27079
+ (match_operand:VI 2 "register_operand" "v")]
27081
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
27084
"vaddu<VI_char>s %0,%1,%2"
27085
[(set_attr "type" "vecsimple")])
27087
@@ -418,16 +432,16 @@
27088
(match_operand:VI 2 "register_operand" "v")]
27090
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
27092
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
27093
"vadds<VI_char>s %0,%1,%2"
27094
[(set_attr "type" "vecsimple")])
27097
(define_insn "sub<mode>3"
27098
- [(set (match_operand:VI 0 "register_operand" "=v")
27099
- (minus:VI (match_operand:VI 1 "register_operand" "v")
27100
- (match_operand:VI 2 "register_operand" "v")))]
27102
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27103
+ (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
27104
+ (match_operand:VI2 2 "register_operand" "v")))]
27106
"vsubu<VI_char>m %0,%1,%2"
27107
[(set_attr "type" "vecsimple")])
27109
@@ -444,7 +458,7 @@
27110
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
27111
(match_operand:V4SI 2 "register_operand" "v")]
27114
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
27116
[(set_attr "type" "vecsimple")])
27118
@@ -454,7 +468,7 @@
27119
(match_operand:VI 2 "register_operand" "v")]
27121
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
27123
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
27124
"vsubu<VI_char>s %0,%1,%2"
27125
[(set_attr "type" "vecsimple")])
27127
@@ -464,7 +478,7 @@
27128
(match_operand:VI 2 "register_operand" "v")]
27130
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
27132
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
27133
"vsubs<VI_char>s %0,%1,%2"
27134
[(set_attr "type" "vecsimple")])
27136
@@ -483,7 +497,7 @@
27137
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
27138
(match_operand:VI 2 "register_operand" "v")]
27141
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
27142
"vavgs<VI_char> %0,%1,%2"
27143
[(set_attr "type" "vecsimple")])
27145
@@ -492,31 +506,31 @@
27146
(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
27147
(match_operand:V4SF 2 "register_operand" "v")]
27150
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
27152
[(set_attr "type" "veccmp")])
27154
(define_insn "*altivec_eq<mode>"
27155
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
27156
- (eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
27157
- (match_operand:VI 2 "altivec_register_operand" "v")))]
27159
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
27160
+ (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
27161
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
27163
"vcmpequ<VI_char> %0,%1,%2"
27164
[(set_attr "type" "veccmp")])
27166
(define_insn "*altivec_gt<mode>"
27167
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
27168
- (gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
27169
- (match_operand:VI 2 "altivec_register_operand" "v")))]
27171
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
27172
+ (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
27173
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
27175
"vcmpgts<VI_char> %0,%1,%2"
27176
[(set_attr "type" "veccmp")])
27178
(define_insn "*altivec_gtu<mode>"
27179
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
27180
- (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
27181
- (match_operand:VI 2 "altivec_register_operand" "v")))]
27183
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
27184
+ (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
27185
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
27187
"vcmpgtu<VI_char> %0,%1,%2"
27188
[(set_attr "type" "veccmp")])
27190
@@ -642,7 +656,7 @@
27191
convert_move (small_swap, swap, 0);
27193
low_product = gen_reg_rtx (V4SImode);
27194
- emit_insn (gen_vec_widen_umult_odd_v8hi (low_product, one, two));
27195
+ emit_insn (gen_altivec_vmulouh (low_product, one, two));
27197
high_product = gen_reg_rtx (V4SImode);
27198
emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
27199
@@ -666,14 +680,23 @@
27200
rtx high = gen_reg_rtx (V4SImode);
27201
rtx low = gen_reg_rtx (V4SImode);
27203
- emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2]));
27204
- emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2]));
27205
+ if (BYTES_BIG_ENDIAN)
27207
+ emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
27208
+ emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
27209
+ emit_insn (gen_altivec_vmrghw_direct (high, even, odd));
27210
+ emit_insn (gen_altivec_vmrglw_direct (low, even, odd));
27211
+ emit_insn (gen_altivec_vpkuwum_direct (operands[0], high, low));
27215
+ emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2]));
27216
+ emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2]));
27217
+ emit_insn (gen_altivec_vmrghw_direct (high, odd, even));
27218
+ emit_insn (gen_altivec_vmrglw_direct (low, odd, even));
27219
+ emit_insn (gen_altivec_vpkuwum_direct (operands[0], low, high));
27222
- emit_insn (gen_altivec_vmrghw (high, even, odd));
27223
- emit_insn (gen_altivec_vmrglw (low, even, odd));
27225
- emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
27230
@@ -744,18 +767,18 @@
27233
(define_insn "umax<mode>3"
27234
- [(set (match_operand:VI 0 "register_operand" "=v")
27235
- (umax:VI (match_operand:VI 1 "register_operand" "v")
27236
- (match_operand:VI 2 "register_operand" "v")))]
27238
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27239
+ (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
27240
+ (match_operand:VI2 2 "register_operand" "v")))]
27242
"vmaxu<VI_char> %0,%1,%2"
27243
[(set_attr "type" "vecsimple")])
27245
(define_insn "smax<mode>3"
27246
- [(set (match_operand:VI 0 "register_operand" "=v")
27247
- (smax:VI (match_operand:VI 1 "register_operand" "v")
27248
- (match_operand:VI 2 "register_operand" "v")))]
27250
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27251
+ (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
27252
+ (match_operand:VI2 2 "register_operand" "v")))]
27254
"vmaxs<VI_char> %0,%1,%2"
27255
[(set_attr "type" "vecsimple")])
27257
@@ -768,18 +791,18 @@
27258
[(set_attr "type" "veccmp")])
27260
(define_insn "umin<mode>3"
27261
- [(set (match_operand:VI 0 "register_operand" "=v")
27262
- (umin:VI (match_operand:VI 1 "register_operand" "v")
27263
- (match_operand:VI 2 "register_operand" "v")))]
27265
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27266
+ (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
27267
+ (match_operand:VI2 2 "register_operand" "v")))]
27269
"vminu<VI_char> %0,%1,%2"
27270
[(set_attr "type" "vecsimple")])
27272
(define_insn "smin<mode>3"
27273
- [(set (match_operand:VI 0 "register_operand" "=v")
27274
- (smin:VI (match_operand:VI 1 "register_operand" "v")
27275
- (match_operand:VI 2 "register_operand" "v")))]
27277
+ [(set (match_operand:VI2 0 "register_operand" "=v")
27278
+ (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
27279
+ (match_operand:VI2 2 "register_operand" "v")))]
27281
"vmins<VI_char> %0,%1,%2"
27282
[(set_attr "type" "vecsimple")])
27284
@@ -823,9 +846,41 @@
27285
"vmladduhm %0,%1,%2,%3"
27286
[(set_attr "type" "veccomplex")])
27288
-(define_insn "altivec_vmrghb"
27289
+(define_expand "altivec_vmrghb"
27290
+ [(use (match_operand:V16QI 0 "register_operand" ""))
27291
+ (use (match_operand:V16QI 1 "register_operand" ""))
27292
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27298
+ /* Special handling for LE with -maltivec=be. */
27299
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27301
+ v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
27302
+ GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
27303
+ GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
27304
+ GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
27305
+ x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
27309
+ v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
27310
+ GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
27311
+ GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
27312
+ GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
27313
+ x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
27316
+ x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27317
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27321
+(define_insn "*altivec_vmrghb_internal"
27322
[(set (match_operand:V16QI 0 "register_operand" "=v")
27323
- (vec_select:V16QI
27324
+ (vec_select:V16QI
27326
(match_operand:V16QI 1 "register_operand" "v")
27327
(match_operand:V16QI 2 "register_operand" "v"))
27328
@@ -838,12 +893,54 @@
27329
(const_int 6) (const_int 22)
27330
(const_int 7) (const_int 23)])))]
27333
+ if (BYTES_BIG_ENDIAN)
27334
+ return "vmrghb %0,%1,%2";
27336
+ return "vmrglb %0,%2,%1";
27338
+ [(set_attr "type" "vecperm")])
27340
+(define_insn "altivec_vmrghb_direct"
27341
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
27342
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
27343
+ (match_operand:V16QI 2 "register_operand" "v")]
27344
+ UNSPEC_VMRGH_DIRECT))]
27347
[(set_attr "type" "vecperm")])
27349
-(define_insn "altivec_vmrghh"
27350
+(define_expand "altivec_vmrghh"
27351
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27352
+ (use (match_operand:V8HI 1 "register_operand" ""))
27353
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27359
+ /* Special handling for LE with -maltivec=be. */
27360
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27362
+ v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
27363
+ GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
27364
+ x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
27368
+ v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
27369
+ GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
27370
+ x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
27373
+ x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27374
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27378
+(define_insn "*altivec_vmrghh_internal"
27379
[(set (match_operand:V8HI 0 "register_operand" "=v")
27383
(match_operand:V8HI 1 "register_operand" "v")
27384
(match_operand:V8HI 2 "register_operand" "v"))
27385
@@ -852,10 +949,50 @@
27386
(const_int 2) (const_int 10)
27387
(const_int 3) (const_int 11)])))]
27390
+ if (BYTES_BIG_ENDIAN)
27391
+ return "vmrghh %0,%1,%2";
27393
+ return "vmrglh %0,%2,%1";
27395
+ [(set_attr "type" "vecperm")])
27397
+(define_insn "altivec_vmrghh_direct"
27398
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
27399
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
27400
+ (match_operand:V8HI 2 "register_operand" "v")]
27401
+ UNSPEC_VMRGH_DIRECT))]
27404
[(set_attr "type" "vecperm")])
27406
-(define_insn "altivec_vmrghw"
27407
+(define_expand "altivec_vmrghw"
27408
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27409
+ (use (match_operand:V4SI 1 "register_operand" ""))
27410
+ (use (match_operand:V4SI 2 "register_operand" ""))]
27411
+ "VECTOR_MEM_ALTIVEC_P (V4SImode)"
27416
+ /* Special handling for LE with -maltivec=be. */
27417
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27419
+ v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
27420
+ x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
27424
+ v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
27425
+ x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
27428
+ x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27429
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27433
+(define_insn "*altivec_vmrghw_internal"
27434
[(set (match_operand:V4SI 0 "register_operand" "=v")
27437
@@ -864,6 +1001,20 @@
27438
(parallel [(const_int 0) (const_int 4)
27439
(const_int 1) (const_int 5)])))]
27440
"VECTOR_MEM_ALTIVEC_P (V4SImode)"
27442
+ if (BYTES_BIG_ENDIAN)
27443
+ return "vmrghw %0,%1,%2";
27445
+ return "vmrglw %0,%2,%1";
27447
+ [(set_attr "type" "vecperm")])
27449
+(define_insn "altivec_vmrghw_direct"
27450
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27451
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
27452
+ (match_operand:V4SI 2 "register_operand" "v")]
27453
+ UNSPEC_VMRGH_DIRECT))]
27456
[(set_attr "type" "vecperm")])
27458
@@ -876,10 +1027,47 @@
27459
(parallel [(const_int 0) (const_int 4)
27460
(const_int 1) (const_int 5)])))]
27461
"VECTOR_MEM_ALTIVEC_P (V4SFmode)"
27462
- "vmrghw %0,%1,%2"
27464
+ if (BYTES_BIG_ENDIAN)
27465
+ return "vmrghw %0,%1,%2";
27467
+ return "vmrglw %0,%2,%1";
27469
[(set_attr "type" "vecperm")])
27471
-(define_insn "altivec_vmrglb"
27472
+(define_expand "altivec_vmrglb"
27473
+ [(use (match_operand:V16QI 0 "register_operand" ""))
27474
+ (use (match_operand:V16QI 1 "register_operand" ""))
27475
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27481
+ /* Special handling for LE with -maltivec=be. */
27482
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27484
+ v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
27485
+ GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
27486
+ GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
27487
+ GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
27488
+ x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
27492
+ v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
27493
+ GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
27494
+ GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
27495
+ GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
27496
+ x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
27499
+ x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27500
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27504
+(define_insn "*altivec_vmrglb_internal"
27505
[(set (match_operand:V16QI 0 "register_operand" "=v")
27508
@@ -894,10 +1082,52 @@
27509
(const_int 14) (const_int 30)
27510
(const_int 15) (const_int 31)])))]
27513
+ if (BYTES_BIG_ENDIAN)
27514
+ return "vmrglb %0,%1,%2";
27516
+ return "vmrghb %0,%2,%1";
27518
+ [(set_attr "type" "vecperm")])
27520
+(define_insn "altivec_vmrglb_direct"
27521
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
27522
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
27523
+ (match_operand:V16QI 2 "register_operand" "v")]
27524
+ UNSPEC_VMRGL_DIRECT))]
27527
[(set_attr "type" "vecperm")])
27529
-(define_insn "altivec_vmrglh"
27530
+(define_expand "altivec_vmrglh"
27531
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27532
+ (use (match_operand:V8HI 1 "register_operand" ""))
27533
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27539
+ /* Special handling for LE with -maltivec=be. */
27540
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27542
+ v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
27543
+ GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
27544
+ x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
27548
+ v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
27549
+ GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
27550
+ x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
27553
+ x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27554
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27558
+(define_insn "*altivec_vmrglh_internal"
27559
[(set (match_operand:V8HI 0 "register_operand" "=v")
27562
@@ -908,10 +1138,50 @@
27563
(const_int 6) (const_int 14)
27564
(const_int 7) (const_int 15)])))]
27567
+ if (BYTES_BIG_ENDIAN)
27568
+ return "vmrglh %0,%1,%2";
27570
+ return "vmrghh %0,%2,%1";
27572
+ [(set_attr "type" "vecperm")])
27574
+(define_insn "altivec_vmrglh_direct"
27575
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
27576
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
27577
+ (match_operand:V8HI 2 "register_operand" "v")]
27578
+ UNSPEC_VMRGL_DIRECT))]
27581
[(set_attr "type" "vecperm")])
27583
-(define_insn "altivec_vmrglw"
27584
+(define_expand "altivec_vmrglw"
27585
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27586
+ (use (match_operand:V4SI 1 "register_operand" ""))
27587
+ (use (match_operand:V4SI 2 "register_operand" ""))]
27588
+ "VECTOR_MEM_ALTIVEC_P (V4SImode)"
27593
+ /* Special handling for LE with -maltivec=be. */
27594
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
27596
+ v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
27597
+ x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
27601
+ v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
27602
+ x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
27605
+ x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
27606
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
27610
+(define_insn "*altivec_vmrglw_internal"
27611
[(set (match_operand:V4SI 0 "register_operand" "=v")
27614
@@ -920,6 +1190,20 @@
27615
(parallel [(const_int 2) (const_int 6)
27616
(const_int 3) (const_int 7)])))]
27617
"VECTOR_MEM_ALTIVEC_P (V4SImode)"
27619
+ if (BYTES_BIG_ENDIAN)
27620
+ return "vmrglw %0,%1,%2";
27622
+ return "vmrghw %0,%2,%1";
27624
+ [(set_attr "type" "vecperm")])
27626
+(define_insn "altivec_vmrglw_direct"
27627
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27628
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
27629
+ (match_operand:V4SI 2 "register_operand" "v")]
27630
+ UNSPEC_VMRGL_DIRECT))]
27633
[(set_attr "type" "vecperm")])
27635
@@ -932,10 +1216,154 @@
27636
(parallel [(const_int 2) (const_int 6)
27637
(const_int 3) (const_int 7)])))]
27638
"VECTOR_MEM_ALTIVEC_P (V4SFmode)"
27639
- "vmrglw %0,%1,%2"
27641
+ if (BYTES_BIG_ENDIAN)
27642
+ return "vmrglw %0,%1,%2";
27644
+ return "vmrghw %0,%2,%1";
27646
[(set_attr "type" "vecperm")])
27648
-(define_insn "vec_widen_umult_even_v16qi"
27649
+;; Power8 vector merge even/odd
27650
+(define_insn "p8_vmrgew"
27651
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27654
+ (match_operand:V4SI 1 "register_operand" "v")
27655
+ (match_operand:V4SI 2 "register_operand" "v"))
27656
+ (parallel [(const_int 0) (const_int 4)
27657
+ (const_int 2) (const_int 6)])))]
27658
+ "TARGET_P8_VECTOR"
27660
+ if (BYTES_BIG_ENDIAN)
27661
+ return "vmrgew %0,%1,%2";
27663
+ return "vmrgow %0,%2,%1";
27665
+ [(set_attr "type" "vecperm")])
27667
+(define_insn "p8_vmrgow"
27668
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27671
+ (match_operand:V4SI 1 "register_operand" "v")
27672
+ (match_operand:V4SI 2 "register_operand" "v"))
27673
+ (parallel [(const_int 1) (const_int 5)
27674
+ (const_int 3) (const_int 7)])))]
27675
+ "TARGET_P8_VECTOR"
27677
+ if (BYTES_BIG_ENDIAN)
27678
+ return "vmrgow %0,%1,%2";
27680
+ return "vmrgew %0,%2,%1";
27682
+ [(set_attr "type" "vecperm")])
27684
+(define_expand "vec_widen_umult_even_v16qi"
27685
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27686
+ (use (match_operand:V16QI 1 "register_operand" ""))
27687
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27690
+ if (VECTOR_ELT_ORDER_BIG)
27691
+ emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
27693
+ emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
27697
+(define_expand "vec_widen_smult_even_v16qi"
27698
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27699
+ (use (match_operand:V16QI 1 "register_operand" ""))
27700
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27703
+ if (VECTOR_ELT_ORDER_BIG)
27704
+ emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
27706
+ emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
27710
+(define_expand "vec_widen_umult_even_v8hi"
27711
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27712
+ (use (match_operand:V8HI 1 "register_operand" ""))
27713
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27716
+ if (VECTOR_ELT_ORDER_BIG)
27717
+ emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
27719
+ emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
27723
+(define_expand "vec_widen_smult_even_v8hi"
27724
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27725
+ (use (match_operand:V8HI 1 "register_operand" ""))
27726
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27729
+ if (VECTOR_ELT_ORDER_BIG)
27730
+ emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
27732
+ emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
27736
+(define_expand "vec_widen_umult_odd_v16qi"
27737
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27738
+ (use (match_operand:V16QI 1 "register_operand" ""))
27739
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27742
+ if (VECTOR_ELT_ORDER_BIG)
27743
+ emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
27745
+ emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
27749
+(define_expand "vec_widen_smult_odd_v16qi"
27750
+ [(use (match_operand:V8HI 0 "register_operand" ""))
27751
+ (use (match_operand:V16QI 1 "register_operand" ""))
27752
+ (use (match_operand:V16QI 2 "register_operand" ""))]
27755
+ if (VECTOR_ELT_ORDER_BIG)
27756
+ emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
27758
+ emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
27762
+(define_expand "vec_widen_umult_odd_v8hi"
27763
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27764
+ (use (match_operand:V8HI 1 "register_operand" ""))
27765
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27768
+ if (VECTOR_ELT_ORDER_BIG)
27769
+ emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
27771
+ emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
27775
+(define_expand "vec_widen_smult_odd_v8hi"
27776
+ [(use (match_operand:V4SI 0 "register_operand" ""))
27777
+ (use (match_operand:V8HI 1 "register_operand" ""))
27778
+ (use (match_operand:V8HI 2 "register_operand" ""))]
27781
+ if (VECTOR_ELT_ORDER_BIG)
27782
+ emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
27784
+ emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
27788
+(define_insn "altivec_vmuleub"
27789
[(set (match_operand:V8HI 0 "register_operand" "=v")
27790
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
27791
(match_operand:V16QI 2 "register_operand" "v")]
27792
@@ -944,43 +1372,25 @@
27794
[(set_attr "type" "veccomplex")])
27796
-(define_insn "vec_widen_smult_even_v16qi"
27797
+(define_insn "altivec_vmuloub"
27798
[(set (match_operand:V8HI 0 "register_operand" "=v")
27799
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
27800
(match_operand:V16QI 2 "register_operand" "v")]
27801
- UNSPEC_VMULESB))]
27802
+ UNSPEC_VMULOUB))]
27804
- "vmulesb %0,%1,%2"
27805
+ "vmuloub %0,%1,%2"
27806
[(set_attr "type" "veccomplex")])
27808
-(define_insn "vec_widen_umult_even_v8hi"
27809
- [(set (match_operand:V4SI 0 "register_operand" "=v")
27810
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27811
- (match_operand:V8HI 2 "register_operand" "v")]
27812
- UNSPEC_VMULEUH))]
27814
- "vmuleuh %0,%1,%2"
27815
- [(set_attr "type" "veccomplex")])
27817
-(define_insn "vec_widen_smult_even_v8hi"
27818
- [(set (match_operand:V4SI 0 "register_operand" "=v")
27819
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27820
- (match_operand:V8HI 2 "register_operand" "v")]
27821
- UNSPEC_VMULESH))]
27823
- "vmulesh %0,%1,%2"
27824
- [(set_attr "type" "veccomplex")])
27826
-(define_insn "vec_widen_umult_odd_v16qi"
27827
+(define_insn "altivec_vmulesb"
27828
[(set (match_operand:V8HI 0 "register_operand" "=v")
27829
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
27830
(match_operand:V16QI 2 "register_operand" "v")]
27831
- UNSPEC_VMULOUB))]
27832
+ UNSPEC_VMULESB))]
27834
- "vmuloub %0,%1,%2"
27835
+ "vmulesb %0,%1,%2"
27836
[(set_attr "type" "veccomplex")])
27838
-(define_insn "vec_widen_smult_odd_v16qi"
27839
+(define_insn "altivec_vmulosb"
27840
[(set (match_operand:V8HI 0 "register_operand" "=v")
27841
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
27842
(match_operand:V16QI 2 "register_operand" "v")]
27843
@@ -989,19 +1399,37 @@
27845
[(set_attr "type" "veccomplex")])
27847
-(define_insn "vec_widen_umult_odd_v8hi"
27848
+(define_insn "altivec_vmuleuh"
27849
[(set (match_operand:V4SI 0 "register_operand" "=v")
27850
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27851
(match_operand:V8HI 2 "register_operand" "v")]
27852
+ UNSPEC_VMULEUH))]
27854
+ "vmuleuh %0,%1,%2"
27855
+ [(set_attr "type" "veccomplex")])
27857
+(define_insn "altivec_vmulouh"
27858
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27859
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27860
+ (match_operand:V8HI 2 "register_operand" "v")]
27864
[(set_attr "type" "veccomplex")])
27866
-(define_insn "vec_widen_smult_odd_v8hi"
27867
+(define_insn "altivec_vmulesh"
27868
[(set (match_operand:V4SI 0 "register_operand" "=v")
27869
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27870
(match_operand:V8HI 2 "register_operand" "v")]
27871
+ UNSPEC_VMULESH))]
27873
+ "vmulesh %0,%1,%2"
27874
+ [(set_attr "type" "veccomplex")])
27876
+(define_insn "altivec_vmulosh"
27877
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
27878
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
27879
+ (match_operand:V8HI 2 "register_operand" "v")]
27883
@@ -1008,74 +1436,7 @@
27884
[(set_attr "type" "veccomplex")])
27887
-;; logical ops. Have the logical ops follow the memory ops in
27888
-;; terms of whether to prefer VSX or Altivec
27890
-(define_insn "*altivec_and<mode>3"
27891
- [(set (match_operand:VM 0 "register_operand" "=v")
27892
- (and:VM (match_operand:VM 1 "register_operand" "v")
27893
- (match_operand:VM 2 "register_operand" "v")))]
27894
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27896
- [(set_attr "type" "vecsimple")])
27898
-(define_insn "*altivec_ior<mode>3"
27899
- [(set (match_operand:VM 0 "register_operand" "=v")
27900
- (ior:VM (match_operand:VM 1 "register_operand" "v")
27901
- (match_operand:VM 2 "register_operand" "v")))]
27902
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27904
- [(set_attr "type" "vecsimple")])
27906
-(define_insn "*altivec_xor<mode>3"
27907
- [(set (match_operand:VM 0 "register_operand" "=v")
27908
- (xor:VM (match_operand:VM 1 "register_operand" "v")
27909
- (match_operand:VM 2 "register_operand" "v")))]
27910
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27912
- [(set_attr "type" "vecsimple")])
27914
-(define_insn "*altivec_one_cmpl<mode>2"
27915
- [(set (match_operand:VM 0 "register_operand" "=v")
27916
- (not:VM (match_operand:VM 1 "register_operand" "v")))]
27917
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27919
- [(set_attr "type" "vecsimple")])
27921
-(define_insn "*altivec_nor<mode>3"
27922
- [(set (match_operand:VM 0 "register_operand" "=v")
27923
- (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
27924
- (match_operand:VM 2 "register_operand" "v"))))]
27925
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27927
- [(set_attr "type" "vecsimple")])
27929
-(define_insn "*altivec_andc<mode>3"
27930
- [(set (match_operand:VM 0 "register_operand" "=v")
27931
- (and:VM (not:VM (match_operand:VM 2 "register_operand" "v"))
27932
- (match_operand:VM 1 "register_operand" "v")))]
27933
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
27935
- [(set_attr "type" "vecsimple")])
27937
-(define_insn "altivec_vpkuhum"
27938
- [(set (match_operand:V16QI 0 "register_operand" "=v")
27939
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
27940
- (match_operand:V8HI 2 "register_operand" "v")]
27941
- UNSPEC_VPKUHUM))]
27943
- "vpkuhum %0,%1,%2"
27944
- [(set_attr "type" "vecperm")])
27946
-(define_insn "altivec_vpkuwum"
27947
- [(set (match_operand:V8HI 0 "register_operand" "=v")
27948
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
27949
- (match_operand:V4SI 2 "register_operand" "v")]
27950
- UNSPEC_VPKUWUM))]
27952
- "vpkuwum %0,%1,%2"
27953
- [(set_attr "type" "vecperm")])
27955
+;; Vector pack/unpack
27956
(define_insn "altivec_vpkpx"
27957
[(set (match_operand:V8HI 0 "register_operand" "=v")
27958
(unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
27959
@@ -1082,74 +1443,95 @@
27960
(match_operand:V4SI 2 "register_operand" "v")]
27966
+ if (VECTOR_ELT_ORDER_BIG)
27967
+ return \"vpkpx %0,%1,%2\";
27969
+ return \"vpkpx %0,%2,%1\";
27971
[(set_attr "type" "vecperm")])
27973
-(define_insn "altivec_vpkshss"
27974
- [(set (match_operand:V16QI 0 "register_operand" "=v")
27975
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
27976
- (match_operand:V8HI 2 "register_operand" "v")]
27978
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
27980
- "vpkshss %0,%1,%2"
27981
+(define_insn "altivec_vpks<VI_char>ss"
27982
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
27983
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
27984
+ (match_operand:VP 2 "register_operand" "v")]
27985
+ UNSPEC_VPACK_SIGN_SIGN_SAT))]
27989
+ if (VECTOR_ELT_ORDER_BIG)
27990
+ return \"vpks<VI_char>ss %0,%1,%2\";
27992
+ return \"vpks<VI_char>ss %0,%2,%1\";
27994
[(set_attr "type" "vecperm")])
27996
-(define_insn "altivec_vpkswss"
27997
- [(set (match_operand:V8HI 0 "register_operand" "=v")
27998
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
27999
- (match_operand:V4SI 2 "register_operand" "v")]
28001
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28003
- "vpkswss %0,%1,%2"
28004
+(define_insn "altivec_vpks<VI_char>us"
28005
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
28006
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
28007
+ (match_operand:VP 2 "register_operand" "v")]
28008
+ UNSPEC_VPACK_SIGN_UNS_SAT))]
28012
+ if (VECTOR_ELT_ORDER_BIG)
28013
+ return \"vpks<VI_char>us %0,%1,%2\";
28015
+ return \"vpks<VI_char>us %0,%2,%1\";
28017
[(set_attr "type" "vecperm")])
28019
-(define_insn "altivec_vpkuhus"
28020
- [(set (match_operand:V16QI 0 "register_operand" "=v")
28021
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
28022
- (match_operand:V8HI 2 "register_operand" "v")]
28024
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28026
- "vpkuhus %0,%1,%2"
28027
+(define_insn "altivec_vpku<VI_char>us"
28028
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
28029
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
28030
+ (match_operand:VP 2 "register_operand" "v")]
28031
+ UNSPEC_VPACK_UNS_UNS_SAT))]
28035
+ if (VECTOR_ELT_ORDER_BIG)
28036
+ return \"vpku<VI_char>us %0,%1,%2\";
28038
+ return \"vpku<VI_char>us %0,%2,%1\";
28040
[(set_attr "type" "vecperm")])
28042
-(define_insn "altivec_vpkshus"
28043
- [(set (match_operand:V16QI 0 "register_operand" "=v")
28044
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
28045
- (match_operand:V8HI 2 "register_operand" "v")]
28047
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28049
- "vpkshus %0,%1,%2"
28050
+(define_insn "altivec_vpku<VI_char>um"
28051
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
28052
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
28053
+ (match_operand:VP 2 "register_operand" "v")]
28054
+ UNSPEC_VPACK_UNS_UNS_MOD))]
28058
+ if (VECTOR_ELT_ORDER_BIG)
28059
+ return \"vpku<VI_char>um %0,%1,%2\";
28061
+ return \"vpku<VI_char>um %0,%2,%1\";
28063
[(set_attr "type" "vecperm")])
28065
-(define_insn "altivec_vpkuwus"
28066
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28067
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
28068
- (match_operand:V4SI 2 "register_operand" "v")]
28070
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28072
- "vpkuwus %0,%1,%2"
28073
+(define_insn "altivec_vpku<VI_char>um_direct"
28074
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
28075
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
28076
+ (match_operand:VP 2 "register_operand" "v")]
28077
+ UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))]
28081
+ if (BYTES_BIG_ENDIAN)
28082
+ return \"vpku<VI_char>um %0,%1,%2\";
28084
+ return \"vpku<VI_char>um %0,%2,%1\";
28086
[(set_attr "type" "vecperm")])
28088
-(define_insn "altivec_vpkswus"
28089
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28090
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
28091
- (match_operand:V4SI 2 "register_operand" "v")]
28093
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28095
- "vpkswus %0,%1,%2"
28096
- [(set_attr "type" "vecperm")])
28098
(define_insn "*altivec_vrl<VI_char>"
28099
- [(set (match_operand:VI 0 "register_operand" "=v")
28100
- (rotate:VI (match_operand:VI 1 "register_operand" "v")
28101
- (match_operand:VI 2 "register_operand" "v")))]
28103
+ [(set (match_operand:VI2 0 "register_operand" "=v")
28104
+ (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
28105
+ (match_operand:VI2 2 "register_operand" "v")))]
28107
"vrl<VI_char> %0,%1,%2"
28108
[(set_attr "type" "vecsimple")])
28110
@@ -1172,26 +1554,26 @@
28111
[(set_attr "type" "vecperm")])
28113
(define_insn "*altivec_vsl<VI_char>"
28114
- [(set (match_operand:VI 0 "register_operand" "=v")
28115
- (ashift:VI (match_operand:VI 1 "register_operand" "v")
28116
- (match_operand:VI 2 "register_operand" "v")))]
28118
+ [(set (match_operand:VI2 0 "register_operand" "=v")
28119
+ (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
28120
+ (match_operand:VI2 2 "register_operand" "v")))]
28122
"vsl<VI_char> %0,%1,%2"
28123
[(set_attr "type" "vecsimple")])
28125
(define_insn "*altivec_vsr<VI_char>"
28126
- [(set (match_operand:VI 0 "register_operand" "=v")
28127
- (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
28128
- (match_operand:VI 2 "register_operand" "v")))]
28130
+ [(set (match_operand:VI2 0 "register_operand" "=v")
28131
+ (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
28132
+ (match_operand:VI2 2 "register_operand" "v")))]
28134
"vsr<VI_char> %0,%1,%2"
28135
[(set_attr "type" "vecsimple")])
28137
(define_insn "*altivec_vsra<VI_char>"
28138
- [(set (match_operand:VI 0 "register_operand" "=v")
28139
- (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
28140
- (match_operand:VI 2 "register_operand" "v")))]
28142
+ [(set (match_operand:VI2 0 "register_operand" "=v")
28143
+ (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
28144
+ (match_operand:VI2 2 "register_operand" "v")))]
28146
"vsra<VI_char> %0,%1,%2"
28147
[(set_attr "type" "vecsimple")])
28149
@@ -1233,15 +1615,29 @@
28150
"vsum4s<VI_char>s %0,%1,%2"
28151
[(set_attr "type" "veccomplex")])
28153
+;; FIXME: For the following two patterns, the scratch should only be
28154
+;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should
28155
+;; be emitted separately.
28156
(define_insn "altivec_vsum2sws"
28157
[(set (match_operand:V4SI 0 "register_operand" "=v")
28158
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
28159
(match_operand:V4SI 2 "register_operand" "v")]
28161
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28162
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
28163
+ (clobber (match_scratch:V4SI 3 "=v"))]
28165
- "vsum2sws %0,%1,%2"
28166
- [(set_attr "type" "veccomplex")])
28168
+ if (VECTOR_ELT_ORDER_BIG)
28169
+ return "vsum2sws %0,%1,%2";
28171
+ return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
28173
+ [(set_attr "type" "veccomplex")
28174
+ (set (attr "length")
28176
+ (match_test "VECTOR_ELT_ORDER_BIG")
28177
+ (const_string "4")
28178
+ (const_string "12")))])
28180
(define_insn "altivec_vsumsws"
28181
[(set (match_operand:V4SI 0 "register_operand" "=v")
28182
@@ -1248,12 +1644,54 @@
28183
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
28184
(match_operand:V4SI 2 "register_operand" "v")]
28186
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
28187
+ (clobber (match_scratch:V4SI 3 "=v"))]
28190
+ if (VECTOR_ELT_ORDER_BIG)
28191
+ return "vsumsws %0,%1,%2";
28193
+ return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12";
28195
+ [(set_attr "type" "veccomplex")
28196
+ (set (attr "length")
28198
+ (match_test "(VECTOR_ELT_ORDER_BIG)")
28199
+ (const_string "4")
28200
+ (const_string "12")))])
28202
+(define_insn "altivec_vsumsws_direct"
28203
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
28204
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
28205
+ (match_operand:V4SI 2 "register_operand" "v")]
28206
+ UNSPEC_VSUMSWS_DIRECT))
28207
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
28210
[(set_attr "type" "veccomplex")])
28212
-(define_insn "altivec_vspltb"
28213
+(define_expand "altivec_vspltb"
28214
+ [(use (match_operand:V16QI 0 "register_operand" ""))
28215
+ (use (match_operand:V16QI 1 "register_operand" ""))
28216
+ (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
28222
+ /* Special handling for LE with -maltivec=be. We have to reflect
28223
+ the actual selected index for the splat in the RTL. */
28224
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28225
+ operands[2] = GEN_INT (15 - INTVAL (operands[2]));
28227
+ v = gen_rtvec (1, operands[2]);
28228
+ x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
28229
+ x = gen_rtx_VEC_DUPLICATE (V16QImode, x);
28230
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
28234
+(define_insn "*altivec_vspltb_internal"
28235
[(set (match_operand:V16QI 0 "register_operand" "=v")
28236
(vec_duplicate:V16QI
28237
(vec_select:QI (match_operand:V16QI 1 "register_operand" "v")
28238
@@ -1260,10 +1698,48 @@
28240
[(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
28243
+ /* For true LE, this adjusts the selected index. For LE with
28244
+ -maltivec=be, this reverses what was done in the define_expand
28245
+ because the instruction already has big-endian bias. */
28246
+ if (!BYTES_BIG_ENDIAN)
28247
+ operands[2] = GEN_INT (15 - INTVAL (operands[2]));
28249
+ return "vspltb %0,%1,%2";
28251
+ [(set_attr "type" "vecperm")])
28253
+(define_insn "altivec_vspltb_direct"
28254
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
28255
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
28256
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
28257
+ UNSPEC_VSPLT_DIRECT))]
28260
[(set_attr "type" "vecperm")])
28262
-(define_insn "altivec_vsplth"
28263
+(define_expand "altivec_vsplth"
28264
+ [(use (match_operand:V8HI 0 "register_operand" ""))
28265
+ (use (match_operand:V8HI 1 "register_operand" ""))
28266
+ (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
28272
+ /* Special handling for LE with -maltivec=be. We have to reflect
28273
+ the actual selected index for the splat in the RTL. */
28274
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28275
+ operands[2] = GEN_INT (7 - INTVAL (operands[2]));
28277
+ v = gen_rtvec (1, operands[2]);
28278
+ x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
28279
+ x = gen_rtx_VEC_DUPLICATE (V8HImode, x);
28280
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
28284
+(define_insn "*altivec_vsplth_internal"
28285
[(set (match_operand:V8HI 0 "register_operand" "=v")
28286
(vec_duplicate:V8HI
28287
(vec_select:HI (match_operand:V8HI 1 "register_operand" "v")
28288
@@ -1270,10 +1746,48 @@
28290
[(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
28293
+ /* For true LE, this adjusts the selected index. For LE with
28294
+ -maltivec=be, this reverses what was done in the define_expand
28295
+ because the instruction already has big-endian bias. */
28296
+ if (!BYTES_BIG_ENDIAN)
28297
+ operands[2] = GEN_INT (7 - INTVAL (operands[2]));
28299
+ return "vsplth %0,%1,%2";
28301
+ [(set_attr "type" "vecperm")])
28303
+(define_insn "altivec_vsplth_direct"
28304
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
28305
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
28306
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
28307
+ UNSPEC_VSPLT_DIRECT))]
28310
[(set_attr "type" "vecperm")])
28312
-(define_insn "altivec_vspltw"
28313
+(define_expand "altivec_vspltw"
28314
+ [(use (match_operand:V4SI 0 "register_operand" ""))
28315
+ (use (match_operand:V4SI 1 "register_operand" ""))
28316
+ (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
28322
+ /* Special handling for LE with -maltivec=be. We have to reflect
28323
+ the actual selected index for the splat in the RTL. */
28324
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28325
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
28327
+ v = gen_rtvec (1, operands[2]);
28328
+ x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
28329
+ x = gen_rtx_VEC_DUPLICATE (V4SImode, x);
28330
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
28334
+(define_insn "*altivec_vspltw_internal"
28335
[(set (match_operand:V4SI 0 "register_operand" "=v")
28336
(vec_duplicate:V4SI
28337
(vec_select:SI (match_operand:V4SI 1 "register_operand" "v")
28338
@@ -1280,10 +1794,48 @@
28340
[(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
28343
+ /* For true LE, this adjusts the selected index. For LE with
28344
+ -maltivec=be, this reverses what was done in the define_expand
28345
+ because the instruction already has big-endian bias. */
28346
+ if (!BYTES_BIG_ENDIAN)
28347
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
28349
+ return "vspltw %0,%1,%2";
28351
+ [(set_attr "type" "vecperm")])
28353
+(define_insn "altivec_vspltw_direct"
28354
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
28355
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
28356
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
28357
+ UNSPEC_VSPLT_DIRECT))]
28360
[(set_attr "type" "vecperm")])
28362
-(define_insn "altivec_vspltsf"
28363
+(define_expand "altivec_vspltsf"
28364
+ [(use (match_operand:V4SF 0 "register_operand" ""))
28365
+ (use (match_operand:V4SF 1 "register_operand" ""))
28366
+ (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
28372
+ /* Special handling for LE with -maltivec=be. We have to reflect
28373
+ the actual selected index for the splat in the RTL. */
28374
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28375
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
28377
+ v = gen_rtvec (1, operands[2]);
28378
+ x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
28379
+ x = gen_rtx_VEC_DUPLICATE (V4SFmode, x);
28380
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
28384
+(define_insn "*altivec_vspltsf_internal"
28385
[(set (match_operand:V4SF 0 "register_operand" "=v")
28386
(vec_duplicate:V4SF
28387
(vec_select:SF (match_operand:V4SF 1 "register_operand" "v")
28388
@@ -1290,7 +1842,15 @@
28390
[(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
28391
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
28392
- "vspltw %0,%1,%2"
28394
+ /* For true LE, this adjusts the selected index. For LE with
28395
+ -maltivec=be, this reverses what was done in the define_expand
28396
+ because the instruction already has big-endian bias. */
28397
+ if (!BYTES_BIG_ENDIAN)
28398
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
28400
+ return "vspltw %0,%1,%2";
28402
[(set_attr "type" "vecperm")])
28404
(define_insn "altivec_vspltis<VI_char>"
28405
@@ -1308,7 +1868,7 @@
28407
[(set_attr "type" "vecfloat")])
28409
-(define_insn "altivec_vperm_<mode>"
28410
+(define_expand "altivec_vperm_<mode>"
28411
[(set (match_operand:VM 0 "register_operand" "=v")
28412
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
28413
(match_operand:VM 2 "register_operand" "v")
28414
@@ -1315,10 +1875,25 @@
28415
(match_operand:V16QI 3 "register_operand" "v")]
28419
+ if (!VECTOR_ELT_ORDER_BIG)
28421
+ altivec_expand_vec_perm_le (operands);
28426
+(define_insn "*altivec_vperm_<mode>_internal"
28427
+ [(set (match_operand:VM 0 "register_operand" "=v")
28428
+ (unspec:VM [(match_operand:VM 1 "register_operand" "v")
28429
+ (match_operand:VM 2 "register_operand" "v")
28430
+ (match_operand:V16QI 3 "register_operand" "v")]
28433
"vperm %0,%1,%2,%3"
28434
[(set_attr "type" "vecperm")])
28436
-(define_insn "altivec_vperm_<mode>_uns"
28437
+(define_expand "altivec_vperm_<mode>_uns"
28438
[(set (match_operand:VM 0 "register_operand" "=v")
28439
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
28440
(match_operand:VM 2 "register_operand" "v")
28441
@@ -1325,6 +1900,21 @@
28442
(match_operand:V16QI 3 "register_operand" "v")]
28443
UNSPEC_VPERM_UNS))]
28446
+ if (!VECTOR_ELT_ORDER_BIG)
28448
+ altivec_expand_vec_perm_le (operands);
28453
+(define_insn "*altivec_vperm_<mode>_uns_internal"
28454
+ [(set (match_operand:VM 0 "register_operand" "=v")
28455
+ (unspec:VM [(match_operand:VM 1 "register_operand" "v")
28456
+ (match_operand:VM 2 "register_operand" "v")
28457
+ (match_operand:V16QI 3 "register_operand" "v")]
28458
+ UNSPEC_VPERM_UNS))]
28460
"vperm %0,%1,%2,%3"
28461
[(set_attr "type" "vecperm")])
28463
@@ -1335,7 +1925,12 @@
28464
(match_operand:V16QI 3 "register_operand" "")]
28469
+ if (!BYTES_BIG_ENDIAN) {
28470
+ altivec_expand_vec_perm_le (operands);
28475
(define_expand "vec_perm_constv16qi"
28476
[(match_operand:V16QI 0 "register_operand" "")
28477
@@ -1476,52 +2071,72 @@
28478
"vsldoi %0,%1,%2,%3"
28479
[(set_attr "type" "vecperm")])
28481
-(define_insn "altivec_vupkhsb"
28482
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28483
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
28484
- UNSPEC_VUPKHSB))]
28487
+(define_insn "altivec_vupkhs<VU_char>"
28488
+ [(set (match_operand:VP 0 "register_operand" "=v")
28489
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28490
+ UNSPEC_VUNPACK_HI_SIGN))]
28493
+ if (VECTOR_ELT_ORDER_BIG)
28494
+ return "vupkhs<VU_char> %0,%1";
28496
+ return "vupkls<VU_char> %0,%1";
28498
[(set_attr "type" "vecperm")])
28500
-(define_insn "altivec_vupkhpx"
28501
- [(set (match_operand:V4SI 0 "register_operand" "=v")
28502
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28503
- UNSPEC_VUPKHPX))]
28506
+(define_insn "*altivec_vupkhs<VU_char>_direct"
28507
+ [(set (match_operand:VP 0 "register_operand" "=v")
28508
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28509
+ UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
28511
+ "vupkhs<VU_char> %0,%1"
28512
[(set_attr "type" "vecperm")])
28514
-(define_insn "altivec_vupkhsh"
28515
- [(set (match_operand:V4SI 0 "register_operand" "=v")
28516
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28517
- UNSPEC_VUPKHSH))]
28520
+(define_insn "altivec_vupkls<VU_char>"
28521
+ [(set (match_operand:VP 0 "register_operand" "=v")
28522
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28523
+ UNSPEC_VUNPACK_LO_SIGN))]
28526
+ if (VECTOR_ELT_ORDER_BIG)
28527
+ return "vupkls<VU_char> %0,%1";
28529
+ return "vupkhs<VU_char> %0,%1";
28531
[(set_attr "type" "vecperm")])
28533
-(define_insn "altivec_vupklsb"
28534
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28535
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
28536
- UNSPEC_VUPKLSB))]
28539
+(define_insn "*altivec_vupkls<VU_char>_direct"
28540
+ [(set (match_operand:VP 0 "register_operand" "=v")
28541
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28542
+ UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
28544
+ "vupkls<VU_char> %0,%1"
28545
[(set_attr "type" "vecperm")])
28547
-(define_insn "altivec_vupklpx"
28548
+(define_insn "altivec_vupkhpx"
28549
[(set (match_operand:V4SI 0 "register_operand" "=v")
28550
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28551
- UNSPEC_VUPKLPX))]
28552
+ UNSPEC_VUPKHPX))]
28556
+ if (VECTOR_ELT_ORDER_BIG)
28557
+ return "vupkhpx %0,%1";
28559
+ return "vupklpx %0,%1";
28561
[(set_attr "type" "vecperm")])
28563
-(define_insn "altivec_vupklsh"
28564
+(define_insn "altivec_vupklpx"
28565
[(set (match_operand:V4SI 0 "register_operand" "=v")
28566
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28567
- UNSPEC_VUPKLSH))]
28568
+ UNSPEC_VUPKLPX))]
28572
+ if (VECTOR_ELT_ORDER_BIG)
28573
+ return "vupklpx %0,%1";
28575
+ return "vupkhpx %0,%1";
28577
[(set_attr "type" "vecperm")])
28579
;; Compare vectors producing a vector result and a predicate, setting CR6 to
28580
@@ -1528,37 +2143,37 @@
28581
;; indicate a combined status
28582
(define_insn "*altivec_vcmpequ<VI_char>_p"
28584
- (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
28585
- (match_operand:VI 2 "register_operand" "v"))]
28586
+ (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
28587
+ (match_operand:VI2 2 "register_operand" "v"))]
28589
- (set (match_operand:VI 0 "register_operand" "=v")
28590
- (eq:VI (match_dup 1)
28592
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
28593
+ (set (match_operand:VI2 0 "register_operand" "=v")
28594
+ (eq:VI2 (match_dup 1)
28597
"vcmpequ<VI_char>. %0,%1,%2"
28598
[(set_attr "type" "veccmp")])
28600
(define_insn "*altivec_vcmpgts<VI_char>_p"
28602
- (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
28603
- (match_operand:VI 2 "register_operand" "v"))]
28604
+ (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
28605
+ (match_operand:VI2 2 "register_operand" "v"))]
28607
- (set (match_operand:VI 0 "register_operand" "=v")
28608
- (gt:VI (match_dup 1)
28610
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
28611
+ (set (match_operand:VI2 0 "register_operand" "=v")
28612
+ (gt:VI2 (match_dup 1)
28615
"vcmpgts<VI_char>. %0,%1,%2"
28616
[(set_attr "type" "veccmp")])
28618
(define_insn "*altivec_vcmpgtu<VI_char>_p"
28620
- (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
28621
- (match_operand:VI 2 "register_operand" "v"))]
28622
+ (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
28623
+ (match_operand:VI2 2 "register_operand" "v"))]
28625
- (set (match_operand:VI 0 "register_operand" "=v")
28626
- (gtu:VI (match_dup 1)
28628
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
28629
+ (set (match_operand:VI2 0 "register_operand" "=v")
28630
+ (gtu:VI2 (match_dup 1)
28633
"vcmpgtu<VI_char>. %0,%1,%2"
28634
[(set_attr "type" "veccmp")])
28636
@@ -1710,12 +2325,26 @@
28637
;; Parallel some of the LVE* and STV*'s with unspecs because some have
28638
;; identical rtl but different instructions-- and gcc gets confused.
28640
-(define_insn "altivec_lve<VI_char>x"
28641
+(define_expand "altivec_lve<VI_char>x"
28643
[(set (match_operand:VI 0 "register_operand" "=v")
28644
(match_operand:VI 1 "memory_operand" "Z"))
28645
(unspec [(const_int 0)] UNSPEC_LVE)])]
28648
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28650
+ altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE);
28655
+(define_insn "*altivec_lve<VI_char>x_internal"
28657
+ [(set (match_operand:VI 0 "register_operand" "=v")
28658
+ (match_operand:VI 1 "memory_operand" "Z"))
28659
+ (unspec [(const_int 0)] UNSPEC_LVE)])]
28661
"lve<VI_char>x %0,%y1"
28662
[(set_attr "type" "vecload")])
28664
@@ -1728,46 +2357,114 @@
28666
[(set_attr "type" "vecload")])
28668
-(define_insn "altivec_lvxl"
28669
+(define_expand "altivec_lvxl_<mode>"
28671
- [(set (match_operand:V4SI 0 "register_operand" "=v")
28672
- (match_operand:V4SI 1 "memory_operand" "Z"))
28673
+ [(set (match_operand:VM2 0 "register_operand" "=v")
28674
+ (match_operand:VM2 1 "memory_operand" "Z"))
28675
(unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
28679
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28681
+ altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR);
28686
+(define_insn "*altivec_lvxl_<mode>_internal"
28688
+ [(set (match_operand:VM2 0 "register_operand" "=v")
28689
+ (match_operand:VM2 1 "memory_operand" "Z"))
28690
+ (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
28693
[(set_attr "type" "vecload")])
28695
-(define_insn "altivec_lvx_<mode>"
28696
+(define_expand "altivec_lvx_<mode>"
28698
[(set (match_operand:VM2 0 "register_operand" "=v")
28699
(match_operand:VM2 1 "memory_operand" "Z"))
28700
(unspec [(const_int 0)] UNSPEC_LVX)])]
28703
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28705
+ altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX);
28710
+(define_insn "*altivec_lvx_<mode>_internal"
28712
+ [(set (match_operand:VM2 0 "register_operand" "=v")
28713
+ (match_operand:VM2 1 "memory_operand" "Z"))
28714
+ (unspec [(const_int 0)] UNSPEC_LVX)])]
28717
[(set_attr "type" "vecload")])
28719
-(define_insn "altivec_stvx_<mode>"
28720
+(define_expand "altivec_stvx_<mode>"
28722
[(set (match_operand:VM2 0 "memory_operand" "=Z")
28723
(match_operand:VM2 1 "register_operand" "v"))
28724
(unspec [(const_int 0)] UNSPEC_STVX)])]
28727
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28729
+ altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX);
28734
+(define_insn "*altivec_stvx_<mode>_internal"
28736
+ [(set (match_operand:VM2 0 "memory_operand" "=Z")
28737
+ (match_operand:VM2 1 "register_operand" "v"))
28738
+ (unspec [(const_int 0)] UNSPEC_STVX)])]
28741
[(set_attr "type" "vecstore")])
28743
-(define_insn "altivec_stvxl"
28744
+(define_expand "altivec_stvxl_<mode>"
28746
- [(set (match_operand:V4SI 0 "memory_operand" "=Z")
28747
- (match_operand:V4SI 1 "register_operand" "v"))
28748
+ [(set (match_operand:VM2 0 "memory_operand" "=Z")
28749
+ (match_operand:VM2 1 "register_operand" "v"))
28750
(unspec [(const_int 0)] UNSPEC_STVXL)])]
28753
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28755
+ altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL);
28760
+(define_insn "*altivec_stvxl_<mode>_internal"
28762
+ [(set (match_operand:VM2 0 "memory_operand" "=Z")
28763
+ (match_operand:VM2 1 "register_operand" "v"))
28764
+ (unspec [(const_int 0)] UNSPEC_STVXL)])]
28767
[(set_attr "type" "vecstore")])
28769
-(define_insn "altivec_stve<VI_char>x"
28770
+(define_expand "altivec_stve<VI_char>x"
28771
[(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
28772
(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
28775
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
28777
+ altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE);
28782
+(define_insn "*altivec_stve<VI_char>x_internal"
28783
+ [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
28784
+ (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
28786
"stve<VI_char>x %1,%y0"
28787
[(set_attr "type" "vecstore")])
28789
@@ -1779,20 +2476,28 @@
28790
[(set_attr "type" "vecstore")])
28793
-;; vspltis? SCRATCH0,0
28794
+;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
28795
;; vsubu?m SCRATCH2,SCRATCH1,%1
28796
;; vmaxs? %0,%1,SCRATCH2"
28797
(define_expand "abs<mode>2"
28798
- [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
28799
- (set (match_dup 3)
28800
- (minus:VI (match_dup 2)
28801
- (match_operand:VI 1 "register_operand" "v")))
28802
- (set (match_operand:VI 0 "register_operand" "=v")
28803
- (smax:VI (match_dup 1) (match_dup 3)))]
28805
+ [(set (match_dup 2) (match_dup 3))
28806
+ (set (match_dup 4)
28807
+ (minus:VI2 (match_dup 2)
28808
+ (match_operand:VI2 1 "register_operand" "v")))
28809
+ (set (match_operand:VI2 0 "register_operand" "=v")
28810
+ (smax:VI2 (match_dup 1) (match_dup 4)))]
28813
- operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
28814
- operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
28815
+ int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
28816
+ rtvec v = rtvec_alloc (n_elt);
28818
+ /* Create an all 0 constant. */
28819
+ for (i = 0; i < n_elt; ++i)
28820
+ RTVEC_ELT (v, i) = const0_rtx;
28822
+ operands[2] = gen_reg_rtx (<MODE>mode);
28823
+ operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
28824
+ operands[4] = gen_reg_rtx (<MODE>mode);
28828
@@ -1844,7 +2549,7 @@
28830
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
28831
emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
28832
- emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero));
28833
+ emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
28837
@@ -1860,7 +2565,7 @@
28839
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
28840
emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
28841
- emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero));
28842
+ emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
28846
@@ -1950,50 +2655,20 @@
28850
-(define_expand "vec_unpacks_hi_v16qi"
28851
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28852
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
28853
- UNSPEC_VUPKHSB))]
28857
- emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
28860
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
28861
+ [(set (match_operand:VP 0 "register_operand" "=v")
28862
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28863
+ UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
28867
-(define_expand "vec_unpacks_hi_v8hi"
28868
- [(set (match_operand:V4SI 0 "register_operand" "=v")
28869
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28870
- UNSPEC_VUPKHSH))]
28874
- emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
28877
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
28878
+ [(set (match_operand:VP 0 "register_operand" "=v")
28879
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
28880
+ UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
28884
-(define_expand "vec_unpacks_lo_v16qi"
28885
- [(set (match_operand:V8HI 0 "register_operand" "=v")
28886
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
28887
- UNSPEC_VUPKLSB))]
28891
- emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
28895
-(define_expand "vec_unpacks_lo_v8hi"
28896
- [(set (match_operand:V4SI 0 "register_operand" "=v")
28897
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
28898
- UNSPEC_VUPKLSH))]
28902
- emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
28906
(define_insn "vperm_v8hiv4si"
28907
[(set (match_operand:V4SI 0 "register_operand" "=v")
28908
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
28909
@@ -2025,25 +2700,26 @@
28910
rtx vzero = gen_reg_rtx (V8HImode);
28911
rtx mask = gen_reg_rtx (V16QImode);
28912
rtvec v = rtvec_alloc (16);
28913
+ bool be = BYTES_BIG_ENDIAN;
28915
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
28917
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
28918
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0);
28919
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
28920
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
28921
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
28922
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2);
28923
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
28924
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
28925
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
28926
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4);
28927
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
28928
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
28929
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
28930
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6);
28931
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
28932
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
28933
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
28934
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16);
28935
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6);
28936
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
28937
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
28938
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16);
28939
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4);
28940
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
28941
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
28942
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16);
28943
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2);
28944
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
28945
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
28946
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16);
28947
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0);
28948
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
28950
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
28951
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
28952
@@ -2060,25 +2736,26 @@
28953
rtx vzero = gen_reg_rtx (V4SImode);
28954
rtx mask = gen_reg_rtx (V16QImode);
28955
rtvec v = rtvec_alloc (16);
28956
+ bool be = BYTES_BIG_ENDIAN;
28958
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
28960
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
28961
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
28962
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0);
28963
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
28964
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
28965
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
28966
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2);
28967
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
28968
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
28969
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
28970
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4);
28971
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
28972
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
28973
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
28974
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6);
28975
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
28976
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
28977
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6);
28978
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17);
28979
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
28980
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
28981
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4);
28982
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17);
28983
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
28984
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
28985
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2);
28986
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17);
28987
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
28988
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
28989
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0);
28990
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17);
28991
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
28993
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
28994
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
28995
@@ -2095,25 +2772,26 @@
28996
rtx vzero = gen_reg_rtx (V8HImode);
28997
rtx mask = gen_reg_rtx (V16QImode);
28998
rtvec v = rtvec_alloc (16);
28999
+ bool be = BYTES_BIG_ENDIAN;
29001
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
29003
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
29004
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8);
29005
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
29006
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
29007
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
29008
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
29009
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
29010
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
29011
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
29012
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12);
29013
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
29014
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
29015
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
29016
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14);
29017
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
29018
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
29019
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
29020
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16);
29021
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
29022
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
29023
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
29024
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
29025
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
29026
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
29027
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
29028
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
29029
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
29030
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
29031
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
29032
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
29033
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8);
29034
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
29036
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
29037
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
29038
@@ -2130,25 +2808,26 @@
29039
rtx vzero = gen_reg_rtx (V4SImode);
29040
rtx mask = gen_reg_rtx (V16QImode);
29041
rtvec v = rtvec_alloc (16);
29042
+ bool be = BYTES_BIG_ENDIAN;
29044
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
29046
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
29047
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
29048
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8);
29049
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
29050
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
29051
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
29052
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
29053
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
29054
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
29055
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
29056
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12);
29057
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
29058
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
29059
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
29060
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14);
29061
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
29062
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
29063
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
29064
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17);
29065
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
29066
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
29067
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
29068
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
29069
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
29070
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
29071
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
29072
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
29073
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
29074
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
29075
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8);
29076
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
29077
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
29079
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
29080
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
29081
@@ -2166,9 +2845,18 @@
29082
rtx ve = gen_reg_rtx (V8HImode);
29083
rtx vo = gen_reg_rtx (V8HImode);
29085
- emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
29086
- emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
29087
- emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
29088
+ if (BYTES_BIG_ENDIAN)
29090
+ emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
29091
+ emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
29092
+ emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
29096
+ emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
29097
+ emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
29098
+ emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
29103
@@ -2183,9 +2871,18 @@
29104
rtx ve = gen_reg_rtx (V8HImode);
29105
rtx vo = gen_reg_rtx (V8HImode);
29107
- emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
29108
- emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
29109
- emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
29110
+ if (BYTES_BIG_ENDIAN)
29112
+ emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
29113
+ emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
29114
+ emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
29118
+ emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
29119
+ emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
29120
+ emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
29125
@@ -2200,9 +2897,18 @@
29126
rtx ve = gen_reg_rtx (V8HImode);
29127
rtx vo = gen_reg_rtx (V8HImode);
29129
- emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
29130
- emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
29131
- emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
29132
+ if (BYTES_BIG_ENDIAN)
29134
+ emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
29135
+ emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
29136
+ emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
29140
+ emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
29141
+ emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
29142
+ emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
29147
@@ -2217,9 +2923,18 @@
29148
rtx ve = gen_reg_rtx (V8HImode);
29149
rtx vo = gen_reg_rtx (V8HImode);
29151
- emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
29152
- emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
29153
- emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
29154
+ if (BYTES_BIG_ENDIAN)
29156
+ emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
29157
+ emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
29158
+ emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
29162
+ emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
29163
+ emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
29164
+ emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
29169
@@ -2234,9 +2949,18 @@
29170
rtx ve = gen_reg_rtx (V4SImode);
29171
rtx vo = gen_reg_rtx (V4SImode);
29173
- emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
29174
- emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
29175
- emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
29176
+ if (BYTES_BIG_ENDIAN)
29178
+ emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
29179
+ emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
29180
+ emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
29184
+ emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
29185
+ emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
29186
+ emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
29191
@@ -2251,9 +2975,18 @@
29192
rtx ve = gen_reg_rtx (V4SImode);
29193
rtx vo = gen_reg_rtx (V4SImode);
29195
- emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
29196
- emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
29197
- emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
29198
+ if (BYTES_BIG_ENDIAN)
29200
+ emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
29201
+ emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
29202
+ emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
29206
+ emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
29207
+ emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
29208
+ emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
29213
@@ -2268,9 +3001,18 @@
29214
rtx ve = gen_reg_rtx (V4SImode);
29215
rtx vo = gen_reg_rtx (V4SImode);
29217
- emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
29218
- emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
29219
- emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
29220
+ if (BYTES_BIG_ENDIAN)
29222
+ emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
29223
+ emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
29224
+ emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
29228
+ emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
29229
+ emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
29230
+ emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
29235
@@ -2285,35 +3027,28 @@
29236
rtx ve = gen_reg_rtx (V4SImode);
29237
rtx vo = gen_reg_rtx (V4SImode);
29239
- emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
29240
- emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
29241
- emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
29242
+ if (BYTES_BIG_ENDIAN)
29244
+ emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
29245
+ emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
29246
+ emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
29250
+ emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
29251
+ emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
29252
+ emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
29257
-(define_expand "vec_pack_trunc_v8hi"
29258
- [(set (match_operand:V16QI 0 "register_operand" "=v")
29259
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
29260
- (match_operand:V8HI 2 "register_operand" "v")]
29261
- UNSPEC_VPKUHUM))]
29265
- emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
29269
-(define_expand "vec_pack_trunc_v4si"
29270
- [(set (match_operand:V8HI 0 "register_operand" "=v")
29271
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
29272
- (match_operand:V4SI 2 "register_operand" "v")]
29273
- UNSPEC_VPKUWUM))]
29277
- emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
29280
+(define_expand "vec_pack_trunc_<mode>"
29281
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
29282
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
29283
+ (match_operand:VP 2 "register_operand" "v")]
29284
+ UNSPEC_VPACK_UNS_UNS_MOD))]
29288
(define_expand "altivec_negv4sf2"
29289
[(use (match_operand:V4SF 0 "register_operand" ""))
29290
@@ -2460,3 +3195,34 @@
29291
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
29296
+;; Power8 vector instructions encoded as Altivec instructions
29298
+;; Vector count leading zeros
29299
+(define_insn "*p8v_clz<mode>2"
29300
+ [(set (match_operand:VI2 0 "register_operand" "=v")
29301
+ (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
29302
+ "TARGET_P8_VECTOR"
29304
+ [(set_attr "length" "4")
29305
+ (set_attr "type" "vecsimple")])
29307
+;; Vector population count
29308
+(define_insn "*p8v_popcount<mode>2"
29309
+ [(set (match_operand:VI2 0 "register_operand" "=v")
29310
+ (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
29311
+ "TARGET_P8_VECTOR"
29312
+ "vpopcnt<wd> %0,%1"
29313
+ [(set_attr "length" "4")
29314
+ (set_attr "type" "vecsimple")])
29316
+;; Vector Gather Bits by Bytes by Doubleword
29317
+(define_insn "p8v_vgbbd"
29318
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
29319
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
29321
+ "TARGET_P8_VECTOR"
29323
+ [(set_attr "length" "4")
29324
+ (set_attr "type" "vecsimple")])
29325
--- a/src/gcc/config/rs6000/sysv4le.h
29326
+++ b/src/gcc/config/rs6000/sysv4le.h
29328
#undef TARGET_DEFAULT
29329
#define TARGET_DEFAULT MASK_LITTLE_ENDIAN
29331
-#undef CC1_ENDIAN_DEFAULT_SPEC
29332
-#define CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_little)"
29334
#undef DEFAULT_ASM_ENDIAN
29335
#define DEFAULT_ASM_ENDIAN " -mlittle"
29339
#undef MULTILIB_DEFAULTS
29340
#define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
29342
+/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default. */
29343
+#define LINUX64_DEFAULT_ABI_ELFv2
29345
--- a/src/gcc/config/rs6000/dfp.md
29346
+++ b/src/gcc/config/rs6000/dfp.md
29351
-(define_expand "movsd"
29352
- [(set (match_operand:SD 0 "nonimmediate_operand" "")
29353
- (match_operand:SD 1 "any_operand" ""))]
29354
- "TARGET_HARD_FLOAT && TARGET_FPRS"
29355
- "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
29358
- [(set (match_operand:SD 0 "gpc_reg_operand" "")
29359
- (match_operand:SD 1 "const_double_operand" ""))]
29360
- "reload_completed
29361
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
29362
- || (GET_CODE (operands[0]) == SUBREG
29363
- && GET_CODE (SUBREG_REG (operands[0])) == REG
29364
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
29365
- [(set (match_dup 2) (match_dup 3))]
29369
- REAL_VALUE_TYPE rv;
29371
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
29372
- REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
29374
- if (! TARGET_POWERPC64)
29375
- operands[2] = operand_subword (operands[0], 0, 0, SDmode);
29377
- operands[2] = gen_lowpart (SImode, operands[0]);
29379
- operands[3] = gen_int_mode (l, SImode);
29382
-(define_insn "movsd_hardfloat"
29383
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
29384
- (match_operand:SD 1 "input_operand" "r,m,r,f,r,h,0,G,Fn"))]
29385
- "(gpc_reg_operand (operands[0], SDmode)
29386
- || gpc_reg_operand (operands[1], SDmode))
29387
- && (TARGET_HARD_FLOAT && TARGET_FPRS)"
29398
- [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
29399
- (set_attr "length" "4,4,4,4,4,4,4,4,8")])
29401
-(define_insn "movsd_softfloat"
29402
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
29403
- (match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
29404
- "(gpc_reg_operand (operands[0], SDmode)
29405
- || gpc_reg_operand (operands[1], SDmode))
29406
- && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
29419
- [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
29420
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
29422
(define_insn "movsd_store"
29423
[(set (match_operand:DD 0 "nonimmediate_operand" "=m")
29424
(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
29425
@@ -108,7 +37,14 @@
29426
|| gpc_reg_operand (operands[1], SDmode))
29427
&& TARGET_HARD_FLOAT && TARGET_FPRS"
29429
- [(set_attr "type" "fpstore")
29430
+ [(set (attr "type")
29432
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
29433
+ (const_string "fpstore_ux")
29435
+ (match_test "update_address_mem (operands[0], VOIDmode)")
29436
+ (const_string "fpstore_u")
29437
+ (const_string "fpstore"))))
29438
(set_attr "length" "4")])
29440
(define_insn "movsd_load"
29441
@@ -119,7 +55,14 @@
29442
|| gpc_reg_operand (operands[1], DDmode))
29443
&& TARGET_HARD_FLOAT && TARGET_FPRS"
29445
- [(set_attr "type" "fpload")
29446
+ [(set (attr "type")
29448
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
29449
+ (const_string "fpload_ux")
29451
+ (match_test "update_address_mem (operands[1], VOIDmode)")
29452
+ (const_string "fpload_u")
29453
+ (const_string "fpload"))))
29454
(set_attr "length" "4")])
29456
;; Hardware support for decimal floating point operations.
29457
@@ -182,211 +125,6 @@
29459
[(set_attr "type" "fp")])
29461
-(define_expand "movdd"
29462
- [(set (match_operand:DD 0 "nonimmediate_operand" "")
29463
- (match_operand:DD 1 "any_operand" ""))]
29465
- "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
29468
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
29469
- (match_operand:DD 1 "const_int_operand" ""))]
29470
- "! TARGET_POWERPC64 && reload_completed
29471
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
29472
- || (GET_CODE (operands[0]) == SUBREG
29473
- && GET_CODE (SUBREG_REG (operands[0])) == REG
29474
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
29475
- [(set (match_dup 2) (match_dup 4))
29476
- (set (match_dup 3) (match_dup 1))]
29479
- int endian = (WORDS_BIG_ENDIAN == 0);
29480
- HOST_WIDE_INT value = INTVAL (operands[1]);
29482
- operands[2] = operand_subword (operands[0], endian, 0, DDmode);
29483
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
29484
-#if HOST_BITS_PER_WIDE_INT == 32
29485
- operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
29487
- operands[4] = GEN_INT (value >> 32);
29488
- operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
29493
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
29494
- (match_operand:DD 1 "const_double_operand" ""))]
29495
- "! TARGET_POWERPC64 && reload_completed
29496
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
29497
- || (GET_CODE (operands[0]) == SUBREG
29498
- && GET_CODE (SUBREG_REG (operands[0])) == REG
29499
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
29500
- [(set (match_dup 2) (match_dup 4))
29501
- (set (match_dup 3) (match_dup 5))]
29504
- int endian = (WORDS_BIG_ENDIAN == 0);
29506
- REAL_VALUE_TYPE rv;
29508
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
29509
- REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
29511
- operands[2] = operand_subword (operands[0], endian, 0, DDmode);
29512
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
29513
- operands[4] = gen_int_mode (l[endian], SImode);
29514
- operands[5] = gen_int_mode (l[1 - endian], SImode);
29518
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
29519
- (match_operand:DD 1 "const_double_operand" ""))]
29520
- "TARGET_POWERPC64 && reload_completed
29521
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
29522
- || (GET_CODE (operands[0]) == SUBREG
29523
- && GET_CODE (SUBREG_REG (operands[0])) == REG
29524
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
29525
- [(set (match_dup 2) (match_dup 3))]
29528
- int endian = (WORDS_BIG_ENDIAN == 0);
29530
- REAL_VALUE_TYPE rv;
29531
-#if HOST_BITS_PER_WIDE_INT >= 64
29532
- HOST_WIDE_INT val;
29535
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
29536
- REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
29538
- operands[2] = gen_lowpart (DImode, operands[0]);
29539
- /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */
29540
-#if HOST_BITS_PER_WIDE_INT >= 64
29541
- val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
29542
- | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
29544
- operands[3] = gen_int_mode (val, DImode);
29546
- operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
29550
-;; Don't have reload use general registers to load a constant. First,
29551
-;; it might not work if the output operand is the equivalent of
29552
-;; a non-offsettable memref, but also it is less efficient than loading
29553
-;; the constant into an FP register, since it will probably be used there.
29554
-;; The "??" is a kludge until we can figure out a more reasonable way
29555
-;; of handling these non-offsettable values.
29556
-(define_insn "*movdd_hardfloat32"
29557
- [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
29558
- (match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
29559
- "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
29560
- && (gpc_reg_operand (operands[0], DDmode)
29561
- || gpc_reg_operand (operands[1], DDmode))"
29564
- switch (which_alternative)
29567
- gcc_unreachable ();
29573
- return \"fmr %0,%1\";
29575
- return \"lfd%U1%X1 %0,%1\";
29577
- return \"stfd%U0%X0 %1,%0\";
29584
- [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
29585
- (set_attr "length" "8,16,16,4,4,4,8,12,16")])
29587
-(define_insn "*movdd_softfloat32"
29588
- [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
29589
- (match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
29590
- "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
29591
- && (gpc_reg_operand (operands[0], DDmode)
29592
- || gpc_reg_operand (operands[1], DDmode))"
29594
- [(set_attr "type" "two,load,store,*,*,*")
29595
- (set_attr "length" "8,8,8,8,12,16")])
29597
-; ld/std require word-aligned displacements -> 'Y' constraint.
29598
-; List Y->r and r->Y before r->r for reload.
29599
-(define_insn "*movdd_hardfloat64_mfpgpr"
29600
- [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
29601
- (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
29602
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
29603
- && (gpc_reg_operand (operands[0], DDmode)
29604
- || gpc_reg_operand (operands[1], DDmode))"
29620
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
29621
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
29623
-; ld/std require word-aligned displacements -> 'Y' constraint.
29624
-; List Y->r and r->Y before r->r for reload.
29625
-(define_insn "*movdd_hardfloat64"
29626
- [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
29627
- (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
29628
- "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
29629
- && (gpc_reg_operand (operands[0], DDmode)
29630
- || gpc_reg_operand (operands[1], DDmode))"
29644
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
29645
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
29647
-(define_insn "*movdd_softfloat64"
29648
- [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
29649
- (match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
29650
- "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
29651
- && (gpc_reg_operand (operands[0], DDmode)
29652
- || gpc_reg_operand (operands[1], DDmode))"
29663
- [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
29664
- (set_attr "length" "4,4,4,4,4,8,12,16,4")])
29666
(define_expand "negtd2"
29667
[(set (match_operand:TD 0 "gpc_reg_operand" "")
29668
(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
29669
@@ -410,40 +148,25 @@
29672
(define_insn "*abstd2_fpr"
29673
- [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
29674
- (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
29675
+ [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
29676
+ (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
29677
"TARGET_HARD_FLOAT && TARGET_FPRS"
29679
- [(set_attr "type" "fp")])
29682
+ fabs %0,%1\;fmr %L0,%L1"
29683
+ [(set_attr "type" "fp")
29684
+ (set_attr "length" "4,8")])
29686
(define_insn "*nabstd2_fpr"
29687
- [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
29688
- (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))]
29689
+ [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
29690
+ (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))]
29691
"TARGET_HARD_FLOAT && TARGET_FPRS"
29693
- [(set_attr "type" "fp")])
29696
+ fnabs %0,%1\;fmr %L0,%L1"
29697
+ [(set_attr "type" "fp")
29698
+ (set_attr "length" "4,8")])
29700
-(define_expand "movtd"
29701
- [(set (match_operand:TD 0 "general_operand" "")
29702
- (match_operand:TD 1 "any_operand" ""))]
29703
- "TARGET_HARD_FLOAT && TARGET_FPRS"
29704
- "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
29706
-; It's important to list the Y->r and r->Y moves before r->r because
29707
-; otherwise reload, given m->r, will try to pick r->r and reload it,
29708
-; which doesn't make progress.
29709
-(define_insn_and_split "*movtd_internal"
29710
- [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
29711
- (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
29712
- "TARGET_HARD_FLOAT && TARGET_FPRS
29713
- && (gpc_reg_operand (operands[0], TDmode)
29714
- || gpc_reg_operand (operands[1], TDmode))"
29716
- "&& reload_completed"
29718
-{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
29719
- [(set_attr "length" "8,8,8,20,20,16")])
29721
;; Hardware support for decimal floating point operations.
29723
(define_insn "extendddtd2"
29724
--- a/src/gcc/config/rs6000/t-linux64
29725
+++ b/src/gcc/config/rs6000/t-linux64
29727
# it doesn't tell anything about the 32bit libraries on those systems. Set
29728
# MULTILIB_OSDIRNAMES according to what is found on the target.
29730
-MULTILIB_OPTIONS = m64/m32
29731
-MULTILIB_DIRNAMES = 64 32
29732
-MULTILIB_EXTRA_OPTS = fPIC
29733
-MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:powerpc64-linux-gnu)
29734
-MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
29735
+MULTILIB_OPTIONS := m64/m32
29736
+MULTILIB_DIRNAMES := 64 32
29737
+MULTILIB_EXTRA_OPTS :=
29738
+MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu)
29739
+MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
29740
--- a/src/gcc/config/rs6000/sync.md
29741
+++ b/src/gcc/config/rs6000/sync.md
29743
;; Machine description for PowerPC synchronization instructions.
29744
-;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
29745
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
29746
;; Contributed by Geoffrey Keating.
29748
;; This file is part of GCC.
29749
@@ -18,14 +18,23 @@
29750
;; along with GCC; see the file COPYING3. If not see
29751
;; <http://www.gnu.org/licenses/>.
29753
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
29754
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
29755
+(define_mode_attr larx [(QI "lbarx")
29761
+(define_mode_attr stcx [(QI "stbcx.")
29767
(define_code_iterator FETCHOP [plus minus ior xor and])
29768
(define_code_attr fetchop_name
29769
[(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
29770
(define_code_attr fetchop_pred
29771
- [(plus "add_operand") (minus "gpc_reg_operand")
29772
+ [(plus "add_operand") (minus "int_reg_operand")
29773
(ior "logical_operand") (xor "logical_operand") (and "and_operand")])
29775
(define_expand "mem_thread_fence"
29776
@@ -129,16 +138,7 @@
29777
case MEMMODEL_CONSUME:
29778
case MEMMODEL_ACQUIRE:
29779
case MEMMODEL_SEQ_CST:
29780
- if (GET_MODE (operands[0]) == QImode)
29781
- emit_insn (gen_loadsync_qi (operands[0]));
29782
- else if (GET_MODE (operands[0]) == HImode)
29783
- emit_insn (gen_loadsync_hi (operands[0]));
29784
- else if (GET_MODE (operands[0]) == SImode)
29785
- emit_insn (gen_loadsync_si (operands[0]));
29786
- else if (GET_MODE (operands[0]) == DImode)
29787
- emit_insn (gen_loadsync_di (operands[0]));
29789
- gcc_unreachable ();
29790
+ emit_insn (gen_loadsync_<mode> (operands[0]));
29793
gcc_unreachable ();
29794
@@ -170,14 +170,26 @@
29798
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
29799
-;; opcode that is "phased-in". Not implemented as of Power7, so not yet used,
29800
-;; but let's prepare the macros anyway.
29801
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
29802
+;; other than the quad memory operations, which have special restrictions.
29803
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
29804
+;; in and did not show up until power8. TImode atomic lqarx/stqcx. require
29805
+;; special handling due to even/odd register requirements.
29806
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
29807
+ (HI "TARGET_SYNC_HI_QI")
29809
+ (DI "TARGET_POWERPC64")])
29811
-(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")])
29812
+;; Types that we should provide atomic instructions for.
29814
+(define_mode_iterator AINT [QI
29817
+ (DI "TARGET_POWERPC64")
29818
+ (TI "TARGET_SYNC_TI")])
29820
(define_insn "load_locked<mode>"
29821
- [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
29822
+ [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
29823
(unspec_volatile:ATOMIC
29824
[(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
29826
@@ -184,21 +196,159 @@
29828
[(set_attr "type" "load_l")])
29830
+(define_insn "load_locked<QHI:mode>_si"
29831
+ [(set (match_operand:SI 0 "int_reg_operand" "=r")
29832
+ (unspec_volatile:SI
29833
+ [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
29834
+ "TARGET_SYNC_HI_QI"
29835
+ "<QHI:larx> %0,%y1"
29836
+ [(set_attr "type" "load_l")])
29838
+;; Use PTImode to get even/odd register pairs.
29840
+;; Use a temporary register to force getting an even register for the
29841
+;; lqarx/stqcrx. instructions. Under AT 7.0, we need use an explicit copy,
29842
+;; even in big endian mode, unless we are using the LRA register allocator. In
29843
+;; GCC 4.9, the register allocator is smart enough to assign a even/odd
29846
+;; On little endian systems where non-atomic quad word load/store instructions
29847
+;; are not used, the address can be register+offset, so make sure the address
29848
+;; is indexed or indirect before register allocation.
29850
+(define_expand "load_lockedti"
29851
+ [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
29852
+ (use (match_operand:TI 1 "memory_operand" ""))]
29855
+ rtx op0 = operands[0];
29856
+ rtx op1 = operands[1];
29857
+ rtx pti = gen_reg_rtx (PTImode);
29859
+ if (!indexed_or_indirect_operand (op1, TImode))
29861
+ rtx old_addr = XEXP (op1, 0);
29862
+ rtx new_addr = force_reg (Pmode, old_addr);
29863
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
29866
+ emit_insn (gen_load_lockedpti (pti, op1));
29867
+ if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
29868
+ emit_move_insn (op0, gen_lowpart (TImode, pti));
29871
+ rtx op0_lo = gen_lowpart (DImode, op0);
29872
+ rtx op0_hi = gen_highpart (DImode, op0);
29873
+ rtx pti_lo = gen_lowpart (DImode, pti);
29874
+ rtx pti_hi = gen_highpart (DImode, pti);
29876
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
29877
+ if (WORDS_BIG_ENDIAN)
29879
+ emit_move_insn (op0_hi, pti_hi);
29880
+ emit_move_insn (op0_lo, pti_lo);
29884
+ emit_move_insn (op0_hi, pti_lo);
29885
+ emit_move_insn (op0_lo, pti_hi);
29891
+(define_insn "load_lockedpti"
29892
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
29893
+ (unspec_volatile:PTI
29894
+ [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
29896
+ && !reg_mentioned_p (operands[0], operands[1])
29897
+ && quad_int_reg_operand (operands[0], PTImode)"
29899
+ [(set_attr "type" "load_l")])
29901
(define_insn "store_conditional<mode>"
29902
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
29903
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
29904
(set (match_operand:ATOMIC 1 "memory_operand" "=Z")
29905
- (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
29906
+ (match_operand:ATOMIC 2 "int_reg_operand" "r"))]
29909
[(set_attr "type" "store_c")])
29911
+;; Use a temporary register to force getting an even register for the
29912
+;; lqarx/stqcrx. instructions. Under AT 7.0, we need use an explicit copy,
29913
+;; even in big endian mode. In GCC 4.9, the register allocator is smart enough
29914
+;; to assign a even/odd register pair.
29916
+;; On little endian systems where non-atomic quad word load/store instructions
29917
+;; are not used, the address can be register+offset, so make sure the address
29918
+;; is indexed or indirect before register allocation.
29920
+(define_expand "store_conditionalti"
29921
+ [(use (match_operand:CC 0 "cc_reg_operand" ""))
29922
+ (use (match_operand:TI 1 "memory_operand" ""))
29923
+ (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
29926
+ rtx op0 = operands[0];
29927
+ rtx op1 = operands[1];
29928
+ rtx op2 = operands[2];
29929
+ rtx addr = XEXP (op1, 0);
29933
+ if (!indexed_or_indirect_operand (op1, TImode))
29935
+ rtx new_addr = force_reg (Pmode, addr);
29936
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
29940
+ pti_mem = change_address (op1, PTImode, addr);
29941
+ pti_reg = gen_reg_rtx (PTImode);
29943
+ if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
29944
+ emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
29947
+ rtx op2_lo = gen_lowpart (DImode, op2);
29948
+ rtx op2_hi = gen_highpart (DImode, op2);
29949
+ rtx pti_lo = gen_lowpart (DImode, pti_reg);
29950
+ rtx pti_hi = gen_highpart (DImode, pti_reg);
29952
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
29953
+ if (WORDS_BIG_ENDIAN)
29955
+ emit_move_insn (pti_hi, op2_hi);
29956
+ emit_move_insn (pti_lo, op2_lo);
29960
+ emit_move_insn (pti_hi, op2_lo);
29961
+ emit_move_insn (pti_lo, op2_hi);
29965
+ emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
29969
+(define_insn "store_conditionalpti"
29970
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
29971
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
29972
+ (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
29973
+ (match_operand:PTI 2 "quad_int_reg_operand" "r"))]
29974
+ "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
29976
+ [(set_attr "type" "store_c")])
29978
(define_expand "atomic_compare_and_swap<mode>"
29979
- [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out
29980
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out
29981
- (match_operand:INT1 2 "memory_operand" "") ;; memory
29982
- (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected
29983
- (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired
29984
+ [(match_operand:SI 0 "int_reg_operand" "") ;; bool out
29985
+ (match_operand:AINT 1 "int_reg_operand" "") ;; val out
29986
+ (match_operand:AINT 2 "memory_operand" "") ;; memory
29987
+ (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected
29988
+ (match_operand:AINT 4 "int_reg_operand" "") ;; desired
29989
(match_operand:SI 5 "const_int_operand" "") ;; is_weak
29990
(match_operand:SI 6 "const_int_operand" "") ;; model succ
29991
(match_operand:SI 7 "const_int_operand" "")] ;; model fail
29992
@@ -209,9 +359,9 @@
29995
(define_expand "atomic_exchange<mode>"
29996
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
29997
- (match_operand:INT1 1 "memory_operand" "") ;; memory
29998
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; input
29999
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
30000
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
30001
+ (match_operand:AINT 2 "int_reg_operand" "") ;; input
30002
(match_operand:SI 3 "const_int_operand" "")] ;; model
30005
@@ -220,9 +370,9 @@
30008
(define_expand "atomic_<fetchop_name><mode>"
30009
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
30010
- (FETCHOP:INT1 (match_dup 0)
30011
- (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand
30012
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
30013
+ (FETCHOP:AINT (match_dup 0)
30014
+ (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand
30015
(match_operand:SI 2 "const_int_operand" "")] ;; model
30018
@@ -232,8 +382,8 @@
30021
(define_expand "atomic_nand<mode>"
30022
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
30023
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand
30024
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
30025
+ (match_operand:AINT 1 "int_reg_operand" "") ;; operand
30026
(match_operand:SI 2 "const_int_operand" "")] ;; model
30029
@@ -243,10 +393,10 @@
30032
(define_expand "atomic_fetch_<fetchop_name><mode>"
30033
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
30034
- (match_operand:INT1 1 "memory_operand" "") ;; memory
30035
- (FETCHOP:INT1 (match_dup 1)
30036
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
30037
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
30038
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
30039
+ (FETCHOP:AINT (match_dup 1)
30040
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
30041
(match_operand:SI 3 "const_int_operand" "")] ;; model
30044
@@ -256,9 +406,9 @@
30047
(define_expand "atomic_fetch_nand<mode>"
30048
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
30049
- (match_operand:INT1 1 "memory_operand" "") ;; memory
30050
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
30051
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
30052
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
30053
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
30054
(match_operand:SI 3 "const_int_operand" "")] ;; model
30057
@@ -268,10 +418,10 @@
30060
(define_expand "atomic_<fetchop_name>_fetch<mode>"
30061
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
30062
- (match_operand:INT1 1 "memory_operand" "") ;; memory
30063
- (FETCHOP:INT1 (match_dup 1)
30064
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
30065
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
30066
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
30067
+ (FETCHOP:AINT (match_dup 1)
30068
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
30069
(match_operand:SI 3 "const_int_operand" "")] ;; model
30072
@@ -281,9 +431,9 @@
30075
(define_expand "atomic_nand_fetch<mode>"
30076
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
30077
- (match_operand:INT1 1 "memory_operand" "") ;; memory
30078
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
30079
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
30080
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
30081
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
30082
(match_operand:SI 3 "const_int_operand" "")] ;; model
30085
--- a/src/gcc/config/rs6000/crypto.md
30086
+++ b/src/gcc/config/rs6000/crypto.md
30088
+;; Cryptographic instructions added in ISA 2.07
30089
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
30090
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
30092
+;; This file is part of GCC.
30094
+;; GCC is free software; you can redistribute it and/or modify it
30095
+;; under the terms of the GNU General Public License as published
30096
+;; by the Free Software Foundation; either version 3, or (at your
30097
+;; option) any later version.
30099
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
30100
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30101
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
30102
+;; License for more details.
30104
+;; You should have received a copy of the GNU General Public License
30105
+;; along with GCC; see the file COPYING3. If not see
30106
+;; <http://www.gnu.org/licenses/>.
30108
+(define_c_enum "unspec"
30111
+ UNSPEC_VCIPHERLAST
30112
+ UNSPEC_VNCIPHERLAST
30118
+;; Iterator for VPMSUM/VPERMXOR
30119
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
30121
+(define_mode_attr CR_char [(V16QI "b")
30126
+;; Iterator for VSHASIGMAD/VSHASIGMAW
30127
+(define_mode_iterator CR_hash [V4SI V2DI])
30129
+;; Iterator for the other crypto functions
30130
+(define_int_iterator CR_code [UNSPEC_VCIPHER
30132
+ UNSPEC_VCIPHERLAST
30133
+ UNSPEC_VNCIPHERLAST])
30135
+(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher")
30136
+ (UNSPEC_VNCIPHER "vncipher")
30137
+ (UNSPEC_VCIPHERLAST "vcipherlast")
30138
+ (UNSPEC_VNCIPHERLAST "vncipherlast")])
30140
+;; 2 operand crypto instructions
30141
+(define_insn "crypto_<CR_insn>"
30142
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
30143
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
30144
+ (match_operand:V2DI 2 "register_operand" "v")]
30147
+ "<CR_insn> %0,%1,%2"
30148
+ [(set_attr "type" "crypto")])
30150
+(define_insn "crypto_vpmsum<CR_char>"
30151
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
30152
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
30153
+ (match_operand:CR_mode 2 "register_operand" "v")]
30156
+ "vpmsum<CR_char> %0,%1,%2"
30157
+ [(set_attr "type" "crypto")])
30159
+;; 3 operand crypto instructions
30160
+(define_insn "crypto_vpermxor_<mode>"
30161
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
30162
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
30163
+ (match_operand:CR_mode 2 "register_operand" "v")
30164
+ (match_operand:CR_mode 3 "register_operand" "v")]
30165
+ UNSPEC_VPERMXOR))]
30167
+ "vpermxor %0,%1,%2,%3"
30168
+ [(set_attr "type" "crypto")])
30170
+;; 1 operand crypto instruction
30171
+(define_insn "crypto_vsbox"
30172
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
30173
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
30177
+ [(set_attr "type" "crypto")])
30179
+;; Hash crypto instructions
30180
+(define_insn "crypto_vshasigma<CR_char>"
30181
+ [(set (match_operand:CR_hash 0 "register_operand" "=v")
30182
+ (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
30183
+ (match_operand:SI 2 "const_0_to_1_operand" "n")
30184
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
30185
+ UNSPEC_VSHASIGMA))]
30187
+ "vshasigma<CR_char> %0,%1,%2,%3"
30188
+ [(set_attr "type" "crypto")])
30189
--- a/src/gcc/config/rs6000/rs6000.md
30190
+++ b/src/gcc/config/rs6000/rs6000.md
30191
@@ -25,10 +25,14 @@
30195
- [(STACK_POINTER_REGNUM 1)
30196
+ [(FIRST_GPR_REGNO 0)
30197
+ (STACK_POINTER_REGNUM 1)
30199
(STATIC_CHAIN_REGNUM 11)
30200
(HARD_FRAME_POINTER_REGNUM 31)
30201
+ (LAST_GPR_REGNO 31)
30202
+ (FIRST_FPR_REGNO 32)
30203
+ (LAST_FPR_REGNO 63)
30206
(ARG_POINTER_REGNUM 67)
30208
(SPE_ACC_REGNO 111)
30209
(SPEFSCR_REGNO 112)
30210
(FRAME_POINTER_REGNUM 113)
30212
- ; ABI defined stack offsets for storing the TOC pointer with AIX calls.
30213
- (TOC_SAVE_OFFSET_32BIT 20)
30214
- (TOC_SAVE_OFFSET_64BIT 40)
30216
- ; Function TOC offset in the AIX function descriptor.
30217
- (AIX_FUNC_DESC_TOC_32BIT 4)
30218
- (AIX_FUNC_DESC_TOC_64BIT 8)
30220
- ; Static chain offset in the AIX function descriptor.
30221
- (AIX_FUNC_DESC_SC_32BIT 8)
30222
- (AIX_FUNC_DESC_SC_64BIT 16)
30223
+ (TFHAR_REGNO 114)
30224
+ (TFIAR_REGNO 115)
30225
+ (TEXASR_REGNO 116)
30229
@@ -123,6 +118,12 @@
30233
+ UNSPEC_P8V_FMRGOW
30234
+ UNSPEC_P8V_MTVSRWZ
30235
+ UNSPEC_P8V_RELOAD_FROM_GPR
30236
+ UNSPEC_P8V_MTVSRD
30237
+ UNSPEC_P8V_XXPERMDI
30238
+ UNSPEC_P8V_RELOAD_FROM_VSX
30242
@@ -142,7 +143,7 @@
30244
;; Define an insn type attribute. This is used in function unit delay
30246
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
30247
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto,htm"
30248
(const_string "integer"))
30250
;; Define floating point instruction sub-types for use with Xfpu.md
30251
@@ -164,7 +165,7 @@
30252
;; Processor type -- this attribute must exactly match the processor_type
30253
;; enumeration in rs6000.h.
30255
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
30256
+(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
30257
(const (symbol_ref "rs6000_cpu_attr")))
30260
@@ -197,6 +198,7 @@
30261
(include "power5.md")
30262
(include "power6.md")
30263
(include "power7.md")
30264
+(include "power8.md")
30265
(include "cell.md")
30266
(include "xfpu.md")
30268
@@ -215,7 +217,7 @@
30269
(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
30271
; Any supported integer mode.
30272
-(define_mode_iterator INT [QI HI SI DI TI])
30273
+(define_mode_iterator INT [QI HI SI DI TI PTI])
30275
; Any supported integer mode that fits in one register.
30276
(define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
30277
@@ -223,6 +225,12 @@
30278
; extend modes for DImode
30279
(define_mode_iterator QHSI [QI HI SI])
30281
+; QImode or HImode for small atomic ops
30282
+(define_mode_iterator QHI [QI HI])
30284
+; HImode or SImode for sign extended fusion ops
30285
+(define_mode_iterator HSI [HI SI])
30287
; SImode or DImode, even if DImode doesn't fit in GPRs.
30288
(define_mode_iterator SDI [SI DI])
30290
@@ -230,6 +238,10 @@
30291
; (one with a '.') will compare; and the size used for arithmetic carries.
30292
(define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
30294
+; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
30295
+; PTImode is GPR only)
30296
+(define_mode_iterator TI2 [TI PTI])
30298
; Any hardware-supported floating-point mode
30299
(define_mode_iterator FP [
30300
(SF "TARGET_HARD_FLOAT
30301
@@ -253,6 +265,49 @@
30302
(V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
30305
+; Floating point move iterators to combine binary and decimal moves
30306
+(define_mode_iterator FMOVE32 [SF SD])
30307
+(define_mode_iterator FMOVE64 [DF DD])
30308
+(define_mode_iterator FMOVE64X [DI DF DD])
30309
+(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
30310
+ (TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
30312
+; Iterators for 128 bit types for direct move
30313
+(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
30321
+; Whether a floating point move is ok, don't allow SD without hardware FP
30322
+(define_mode_attr fmove_ok [(SF "")
30324
+ (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
30327
+; Convert REAL_VALUE to the appropriate bits
30328
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
30329
+ (DF "REAL_VALUE_TO_TARGET_DOUBLE")
30330
+ (SD "REAL_VALUE_TO_TARGET_DECIMAL32")
30331
+ (DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
30333
+; Definitions for load to 32-bit fpr register
30334
+(define_mode_attr f32_lr [(SF "f") (SD "wz")])
30335
+(define_mode_attr f32_lm [(SF "m") (SD "Z")])
30336
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
30337
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1") (SD "lxsiwzx %x0,%y1")])
30339
+; Definitions for store from 32-bit fpr register
30340
+(define_mode_attr f32_sr [(SF "f") (SD "wx")])
30341
+(define_mode_attr f32_sm [(SF "m") (SD "Z")])
30342
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
30343
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwzx %x1,%y0")])
30345
+; Definitions for 32-bit fpr direct move
30346
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
30348
; These modes do not fit in integer registers in 32-bit mode.
30349
; but on e500v2, the gpr are 64 bit registers
30350
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
30351
@@ -263,6 +318,25 @@
30352
; Iterator for just SF/DF
30353
(define_mode_iterator SFDF [SF DF])
30355
+; SF/DF suffix for traditional floating instructions
30356
+(define_mode_attr Ftrad [(SF "s") (DF "")])
30358
+; SF/DF suffix for VSX instructions
30359
+(define_mode_attr Fvsx [(SF "sp") (DF "dp")])
30361
+; SF/DF constraint for arithmetic on traditional floating point registers
30362
+(define_mode_attr Ff [(SF "f") (DF "d")])
30364
+; SF/DF constraint for arithmetic on VSX registers
30365
+(define_mode_attr Fv [(SF "wy") (DF "ws")])
30367
+; s/d suffix for things like fp_addsub_s/fp_addsub_d
30368
+(define_mode_attr Fs [(SF "s") (DF "d")])
30370
+; FRE/FRES support
30371
+(define_mode_attr Ffre [(SF "fres") (DF "fre")])
30372
+(define_mode_attr FFRE [(SF "FRES") (DF "FRE")])
30374
; Conditional returns.
30375
(define_code_iterator any_return [return simple_return])
30376
(define_code_attr return_pred [(return "direct_return ()")
30377
@@ -271,7 +345,14 @@
30379
; Various instructions that come in SI and DI forms.
30380
; A generic w/d attribute, for things like cmpw/cmpd.
30381
-(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
30382
+(define_mode_attr wd [(QI "b")
30392
(define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
30393
@@ -297,6 +378,8 @@
30395
(define_mode_attr rreg [(SF "f")
30402
@@ -311,6 +394,77 @@
30404
(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
30405
(DF "TARGET_DOUBLE_FLOAT")])
30407
+;; Mode iterator for logical operations on 128-bit types
30408
+(define_mode_iterator BOOL_128 [TI
30410
+ (V16QI "TARGET_ALTIVEC")
30411
+ (V8HI "TARGET_ALTIVEC")
30412
+ (V4SI "TARGET_ALTIVEC")
30413
+ (V4SF "TARGET_ALTIVEC")
30414
+ (V2DI "TARGET_ALTIVEC")
30415
+ (V2DF "TARGET_ALTIVEC")])
30417
+;; For the GPRs we use 3 constraints for register outputs, two that are the
30418
+;; same as the output register, and a third where the output register is an
30419
+;; early clobber, so we don't have to deal with register overlaps. For the
30420
+;; vector types, we prefer to use the vector registers. For TI mode, allow
30423
+;; Mode attribute for boolean operation register constraints for output
30424
+(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wa,v")
30426
+ (V16QI "wa,v,&?r,?r,?r")
30427
+ (V8HI "wa,v,&?r,?r,?r")
30428
+ (V4SI "wa,v,&?r,?r,?r")
30429
+ (V4SF "wa,v,&?r,?r,?r")
30430
+ (V2DI "wa,v,&?r,?r,?r")
30431
+ (V2DF "wa,v,&?r,?r,?r")])
30433
+;; Mode attribute for boolean operation register constraints for operand1
30434
+(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v")
30436
+ (V16QI "wa,v,r,0,r")
30437
+ (V8HI "wa,v,r,0,r")
30438
+ (V4SI "wa,v,r,0,r")
30439
+ (V4SF "wa,v,r,0,r")
30440
+ (V2DI "wa,v,r,0,r")
30441
+ (V2DF "wa,v,r,0,r")])
30443
+;; Mode attribute for boolean operation register constraints for operand2
30444
+(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v")
30446
+ (V16QI "wa,v,r,r,0")
30447
+ (V8HI "wa,v,r,r,0")
30448
+ (V4SI "wa,v,r,r,0")
30449
+ (V4SF "wa,v,r,r,0")
30450
+ (V2DI "wa,v,r,r,0")
30451
+ (V2DF "wa,v,r,r,0")])
30453
+;; Mode attribute for boolean operation register constraints for operand1
30454
+;; for one_cmpl. To simplify things, we repeat the constraint where 0
30455
+;; is used for operand1 or operand2
30456
+(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v")
30458
+ (V16QI "wa,v,r,0,0")
30459
+ (V8HI "wa,v,r,0,0")
30460
+ (V4SI "wa,v,r,0,0")
30461
+ (V4SF "wa,v,r,0,0")
30462
+ (V2DI "wa,v,r,0,0")
30463
+ (V2DF "wa,v,r,0,0")])
30465
+;; Mode attribute for the clobber of CC0 for AND expansion.
30466
+;; For the 128-bit types, we never do AND immediate, but we need to
30467
+;; get the correct number of X's for the number of operands.
30468
+(define_mode_attr BOOL_REGS_AND_CR0 [(TI "X,X,X,X,X")
30470
+ (V16QI "X,X,X,X,X")
30471
+ (V8HI "X,X,X,X,X")
30472
+ (V4SI "X,X,X,X,X")
30473
+ (V4SF "X,X,X,X,X")
30474
+ (V2DI "X,X,X,X,X")
30475
+ (V2DF "X,X,X,X,X")])
30478
;; Start with fixed-point load and store insns. Here we put only the more
30479
;; complex forms. Basic data transfer is done later.
30480
@@ -324,11 +478,19 @@
30481
(define_insn "*zero_extend<mode>di2_internal1"
30482
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
30483
(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
30484
- "TARGET_POWERPC64"
30485
+ "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
30488
rldicl %0,%1,0,<dbits>"
30489
- [(set_attr "type" "load,*")])
30490
+ [(set_attr_alternative "type"
30492
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30493
+ (const_string "load_ux")
30495
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30496
+ (const_string "load_u")
30497
+ (const_string "load")))
30498
+ (const_string "*")])])
30500
(define_insn "*zero_extend<mode>di2_internal2"
30501
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
30502
@@ -382,6 +544,29 @@
30506
+(define_insn "*zero_extendsidi2_lfiwzx"
30507
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
30508
+ (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
30509
+ "TARGET_POWERPC64 && TARGET_LFIWZX"
30512
+ rldicl %0,%1,0,32
30516
+ [(set_attr_alternative "type"
30518
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30519
+ (const_string "load_ux")
30521
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30522
+ (const_string "load_u")
30523
+ (const_string "load")))
30524
+ (const_string "*")
30525
+ (const_string "mffgpr")
30526
+ (const_string "fpload")
30527
+ (const_string "fpload")])])
30529
(define_insn "extendqidi2"
30530
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
30531
(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
30532
@@ -454,7 +639,15 @@
30536
- [(set_attr "type" "load_ext,exts")])
30537
+ [(set_attr_alternative "type"
30539
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30540
+ (const_string "load_ext_ux")
30542
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30543
+ (const_string "load_ext_u")
30544
+ (const_string "load_ext")))
30545
+ (const_string "exts")])])
30548
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
30549
@@ -521,16 +714,47 @@
30554
+(define_insn "*extendsidi2_lfiwax"
30555
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
30556
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
30557
+ "TARGET_POWERPC64 && TARGET_LFIWAX"
30564
+ [(set_attr_alternative "type"
30566
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30567
+ (const_string "load_ext_ux")
30569
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30570
+ (const_string "load_ext_u")
30571
+ (const_string "load_ext")))
30572
+ (const_string "exts")
30573
+ (const_string "mffgpr")
30574
+ (const_string "fpload")
30575
+ (const_string "fpload")])])
30577
+(define_insn "*extendsidi2_nocell"
30578
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
30579
(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
30580
- "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
30581
+ "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
30585
- [(set_attr "type" "load_ext,exts")])
30586
+ [(set_attr_alternative "type"
30588
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30589
+ (const_string "load_ext_ux")
30591
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30592
+ (const_string "load_ext_u")
30593
+ (const_string "load_ext")))
30594
+ (const_string "exts")])])
30597
+(define_insn "*extendsidi2_nocell"
30598
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
30599
(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
30600
"TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
30601
@@ -602,7 +826,15 @@
30604
rlwinm %0,%1,0,0xff"
30605
- [(set_attr "type" "load,*")])
30606
+ [(set_attr_alternative "type"
30608
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30609
+ (const_string "load_ux")
30611
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30612
+ (const_string "load_u")
30613
+ (const_string "load")))
30614
+ (const_string "*")])])
30617
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
30618
@@ -722,7 +954,15 @@
30621
rlwinm %0,%1,0,0xff"
30622
- [(set_attr "type" "load,*")])
30623
+ [(set_attr_alternative "type"
30625
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30626
+ (const_string "load_ux")
30628
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30629
+ (const_string "load_u")
30630
+ (const_string "load")))
30631
+ (const_string "*")])])
30634
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
30635
@@ -848,7 +1088,15 @@
30638
rlwinm %0,%1,0,0xffff"
30639
- [(set_attr "type" "load,*")])
30640
+ [(set_attr_alternative "type"
30642
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30643
+ (const_string "load_ux")
30645
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30646
+ (const_string "load_u")
30647
+ (const_string "load")))
30648
+ (const_string "*")])])
30651
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
30652
@@ -915,7 +1163,15 @@
30656
- [(set_attr "type" "load_ext,exts")])
30657
+ [(set_attr_alternative "type"
30659
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
30660
+ (const_string "load_ext_ux")
30662
+ (match_test "update_address_mem (operands[1], VOIDmode)")
30663
+ (const_string "load_ext_u")
30664
+ (const_string "load_ext")))
30665
+ (const_string "exts")])])
30668
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
30669
@@ -1658,7 +1914,19 @@
30673
-(define_insn "one_cmpl<mode>2"
30674
+(define_expand "one_cmpl<mode>2"
30675
+ [(set (match_operand:SDI 0 "gpc_reg_operand" "")
30676
+ (not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
30679
+ if (<MODE>mode == DImode && !TARGET_POWERPC64)
30681
+ rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
30686
+(define_insn "*one_cmpl<mode>2"
30687
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
30688
(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
30690
@@ -1935,7 +2203,9 @@
30691
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
30692
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
30693
"TARGET_CMPB && TARGET_POPCNTB"
30694
- "prty<wd> %0,%1")
30696
+ [(set_attr "length" "4")
30697
+ (set_attr "type" "popcnt")])
30699
(define_expand "parity<mode>2"
30700
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
30701
@@ -4054,7 +4324,7 @@
30705
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
30706
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
30707
(set_attr "length" "4,4,4,8,8,8")])
30710
@@ -4086,7 +4356,7 @@
30714
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
30715
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
30716
(set_attr "length" "4,4,4,8,8,8")])
30719
@@ -4455,224 +4725,226 @@
30723
-;; Floating-point insns, excluding normal data motion.
30725
-;; PowerPC has a full set of single-precision floating point instructions.
30727
-;; For the POWER architecture, we pretend that we have both SFmode and
30728
-;; DFmode insns, while, in fact, all fp insns are actually done in double.
30729
-;; The only conversions we will do will be when storing to memory. In that
30730
-;; case, we will use the "frsp" instruction before storing.
30732
-;; Note that when we store into a single-precision memory location, we need to
30733
-;; use the frsp insn first. If the register being stored isn't dead, we
30734
-;; need a scratch register for the frsp. But this is difficult when the store
30735
-;; is done by reload. It is not incorrect to do the frsp on the register in
30736
-;; this case, we just lose precision that we would have otherwise gotten but
30737
-;; is not guaranteed. Perhaps this should be tightened up at some point.
30739
+;; Floating-point insns, excluding normal data motion. We combine the SF/DF
30740
+;; modes here, and also add in conditional vsx/power8-vector support to access
30741
+;; values in the traditional Altivec registers if the appropriate
30742
+;; -mupper-regs-{df,sf} option is enabled.
30744
-(define_expand "extendsfdf2"
30745
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
30746
- (float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
30747
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
30748
+(define_expand "abs<mode>2"
30749
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30750
+ (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
30751
+ "TARGET_<MODE>_INSN"
30754
-(define_insn_and_split "*extendsfdf2_fpr"
30755
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
30756
- (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
30757
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
30758
+(define_insn "*abs<mode>2_fpr"
30759
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30760
+ (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
30761
+ "TARGET_<MODE>_FPR"
30766
- "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
30769
- emit_note (NOTE_INSN_DELETED);
30772
- [(set_attr "type" "fp,fp,fpload")])
30775
+ [(set_attr "type" "fp")
30776
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
30778
-(define_expand "truncdfsf2"
30779
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30780
- (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
30781
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
30783
+(define_insn "*nabs<mode>2_fpr"
30784
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30787
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))]
30788
+ "TARGET_<MODE>_FPR"
30791
+ xsnabsdp %x0,%x1"
30792
+ [(set_attr "type" "fp")
30793
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
30795
-(define_insn "*truncdfsf2_fpr"
30796
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30797
- (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
30798
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
30800
- [(set_attr "type" "fp")])
30802
-(define_expand "negsf2"
30803
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30804
- (neg:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
30805
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
30806
+(define_expand "neg<mode>2"
30807
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30808
+ (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
30809
+ "TARGET_<MODE>_INSN"
30812
-(define_insn "*negsf2"
30813
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30814
- (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
30815
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30817
- [(set_attr "type" "fp")])
30818
+(define_insn "*neg<mode>2_fpr"
30819
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30820
+ (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
30821
+ "TARGET_<MODE>_FPR"
30825
+ [(set_attr "type" "fp")
30826
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
30828
-(define_expand "abssf2"
30829
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30830
- (abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
30831
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
30832
+(define_expand "add<mode>3"
30833
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30834
+ (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
30835
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
30836
+ "TARGET_<MODE>_INSN"
30839
-(define_insn "*abssf2"
30840
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30841
- (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
30842
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30844
- [(set_attr "type" "fp")])
30845
+(define_insn "*add<mode>3_fpr"
30846
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30847
+ (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
30848
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
30849
+ "TARGET_<MODE>_FPR"
30851
+ fadd<Ftrad> %0,%1,%2
30852
+ xsadd<Fvsx> %x0,%x1,%x2"
30853
+ [(set_attr "type" "fp")
30854
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
30857
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30858
- (neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f"))))]
30859
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30861
- [(set_attr "type" "fp")])
30863
-(define_expand "addsf3"
30864
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30865
- (plus:SF (match_operand:SF 1 "gpc_reg_operand" "")
30866
- (match_operand:SF 2 "gpc_reg_operand" "")))]
30867
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
30868
+(define_expand "sub<mode>3"
30869
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30870
+ (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
30871
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
30872
+ "TARGET_<MODE>_INSN"
30876
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30877
- (plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
30878
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
30879
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30881
+(define_insn "*sub<mode>3_fpr"
30882
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30883
+ (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
30884
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
30885
+ "TARGET_<MODE>_FPR"
30887
+ fsub<Ftrad> %0,%1,%2
30888
+ xssub<Fvsx> %x0,%x1,%x2"
30889
[(set_attr "type" "fp")
30890
- (set_attr "fp_type" "fp_addsub_s")])
30891
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
30893
-(define_expand "subsf3"
30894
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30895
- (minus:SF (match_operand:SF 1 "gpc_reg_operand" "")
30896
- (match_operand:SF 2 "gpc_reg_operand" "")))]
30897
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
30898
+(define_expand "mul<mode>3"
30899
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30900
+ (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
30901
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
30902
+ "TARGET_<MODE>_INSN"
30906
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30907
- (minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
30908
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
30909
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30911
- [(set_attr "type" "fp")
30912
- (set_attr "fp_type" "fp_addsub_s")])
30913
+(define_insn "*mul<mode>3_fpr"
30914
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30915
+ (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
30916
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
30917
+ "TARGET_<MODE>_FPR"
30919
+ fmul<Ftrad> %0,%1,%2
30920
+ xsmul<Fvsx> %x0,%x1,%x2"
30921
+ [(set_attr "type" "dmul")
30922
+ (set_attr "fp_type" "fp_mul_<Fs>")])
30924
-(define_expand "mulsf3"
30925
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30926
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "")
30927
- (match_operand:SF 2 "gpc_reg_operand" "")))]
30928
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
30929
+(define_expand "div<mode>3"
30930
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
30931
+ (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
30932
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
30933
+ "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU"
30937
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30938
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
30939
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
30940
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
30942
- [(set_attr "type" "fp")
30943
- (set_attr "fp_type" "fp_mul_s")])
30944
+(define_insn "*div<mode>3_fpr"
30945
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30946
+ (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
30947
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
30948
+ "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU"
30950
+ fdiv<Ftrad> %0,%1,%2
30951
+ xsdiv<Fvsx> %x0,%x1,%x2"
30952
+ [(set_attr "type" "<Fs>div")
30953
+ (set_attr "fp_type" "fp_div_<Fs>")])
30955
-(define_expand "divsf3"
30956
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
30957
- (div:SF (match_operand:SF 1 "gpc_reg_operand" "")
30958
- (match_operand:SF 2 "gpc_reg_operand" "")))]
30959
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
30961
+(define_insn "sqrt<mode>2"
30962
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30963
+ (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
30964
+ "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
30965
+ && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
30967
+ fsqrt<Ftrad> %0,%1
30968
+ xssqrt<Fvsx> %x0,%x1"
30969
+ [(set_attr "type" "<Fs>sqrt")
30970
+ (set_attr "fp_type" "fp_sqrt_<Fs>")])
30973
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30974
- (div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
30975
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
30976
- "TARGET_HARD_FLOAT && TARGET_FPRS
30977
- && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
30979
- [(set_attr "type" "sdiv")])
30980
+;; Floating point reciprocal approximation
30981
+(define_insn "fre<Fs>"
30982
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30983
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
30988
+ xsre<Fvsx> %x0,%x1"
30989
+ [(set_attr "type" "fp")])
30991
-(define_insn "fres"
30992
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
30993
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
30996
+(define_insn "*rsqrt<mode>2"
30997
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
30998
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
31000
+ "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
31002
+ frsqrte<Ftrad> %0,%1
31003
+ xsrsqrte<Fvsx> %x0,%x1"
31004
[(set_attr "type" "fp")])
31006
-; builtin fmaf support
31007
-(define_insn "*fmasf4_fpr"
31008
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31009
- (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
31010
- (match_operand:SF 2 "gpc_reg_operand" "f")
31011
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
31012
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
31013
- "fmadds %0,%1,%2,%3"
31014
- [(set_attr "type" "fp")
31015
- (set_attr "fp_type" "fp_maddsub_s")])
31016
+;; Floating point comparisons
31017
+(define_insn "*cmp<mode>_fpr"
31018
+ [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
31019
+ (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
31020
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
31021
+ "TARGET_<MODE>_FPR"
31024
+ xscmpudp %0,%x1,%x2"
31025
+ [(set_attr "type" "fpcompare")])
31027
-(define_insn "*fmssf4_fpr"
31028
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31029
- (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
31030
- (match_operand:SF 2 "gpc_reg_operand" "f")
31031
- (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
31032
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
31033
- "fmsubs %0,%1,%2,%3"
31034
- [(set_attr "type" "fp")
31035
- (set_attr "fp_type" "fp_maddsub_s")])
31036
+;; Floating point conversions
31037
+(define_expand "extendsfdf2"
31038
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
31039
+ (float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
31040
+ "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31043
-(define_insn "*nfmasf4_fpr"
31044
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31045
- (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
31046
- (match_operand:SF 2 "gpc_reg_operand" "f")
31047
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
31048
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
31049
- "fnmadds %0,%1,%2,%3"
31050
- [(set_attr "type" "fp")
31051
- (set_attr "fp_type" "fp_maddsub_s")])
31052
+(define_insn_and_split "*extendsfdf2_fpr"
31053
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
31054
+ (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
31055
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
31061
+ xxlor %x0,%x1,%x1
31063
+ "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
31066
+ emit_note (NOTE_INSN_DELETED);
31069
+ [(set_attr_alternative "type"
31070
+ [(const_string "fp")
31071
+ (const_string "fp")
31073
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
31074
+ (const_string "fpload_ux")
31076
+ (match_test "update_address_mem (operands[1], VOIDmode)")
31077
+ (const_string "fpload_u")
31078
+ (const_string "fpload")))
31079
+ (const_string "fp")
31080
+ (const_string "vecsimple")
31082
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
31083
+ (const_string "fpload_ux")
31085
+ (match_test "update_address_mem (operands[1], VOIDmode)")
31086
+ (const_string "fpload_u")
31087
+ (const_string "fpload")))])])
31089
-(define_insn "*nfmssf4_fpr"
31090
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31091
- (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
31092
- (match_operand:SF 2 "gpc_reg_operand" "f")
31093
- (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
31094
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
31095
- "fnmsubs %0,%1,%2,%3"
31096
- [(set_attr "type" "fp")
31097
- (set_attr "fp_type" "fp_maddsub_s")])
31099
-(define_expand "sqrtsf2"
31100
+(define_expand "truncdfsf2"
31101
[(set (match_operand:SF 0 "gpc_reg_operand" "")
31102
- (sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
31103
- "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU)
31104
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
31105
- && !TARGET_SIMPLE_FPU"
31106
+ (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
31107
+ "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31111
+(define_insn "*truncdfsf2_fpr"
31112
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31113
- (sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
31114
- "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
31115
- && TARGET_FPRS && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
31117
- [(set_attr "type" "ssqrt")])
31119
-(define_insn "*rsqrtsf_internal1"
31120
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
31121
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
31123
- "TARGET_FRSQRTES"
31125
+ (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
31126
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
31128
[(set_attr "type" "fp")])
31130
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
31131
@@ -4742,12 +5014,14 @@
31132
;; Use an unspec rather providing an if-then-else in RTL, to prevent the
31133
;; compiler from optimizing -0.0
31134
(define_insn "copysign<mode>3_fcpsgn"
31135
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
31136
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
31137
- (match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
31138
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31139
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
31140
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")]
31142
- "TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
31143
- "fcpsgn %0,%2,%1"
31144
+ "TARGET_<MODE>_FPR && TARGET_CMPB"
31147
+ xscpsgn<Fvsx> %x0,%x2,%x1"
31148
[(set_attr "type" "fp")])
31150
;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
31151
@@ -4754,27 +5028,70 @@
31152
;; fsel instruction and some auxiliary computations. Then we just have a
31153
;; single DEFINE_INSN for fsel and the define_splits to make them if made by
31155
-(define_expand "smaxsf3"
31156
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
31157
- (if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
31158
- (match_operand:SF 2 "gpc_reg_operand" ""))
31161
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS
31162
- && TARGET_SINGLE_FLOAT && !flag_trapping_math"
31163
- "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
31164
+;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we
31165
+;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary
31166
+;; computations. Then we just have a single DEFINE_INSN for fsel and the
31167
+;; define_splits to make them if made by combine. On VSX machines we have the
31168
+;; min/max instructions.
31170
+;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector
31171
+;; to allow either DF/SF to use only traditional registers.
31173
-(define_expand "sminsf3"
31174
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
31175
- (if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
31176
- (match_operand:SF 2 "gpc_reg_operand" ""))
31179
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS
31180
- && TARGET_SINGLE_FLOAT && !flag_trapping_math"
31181
- "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
31182
+(define_expand "smax<mode>3"
31183
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31184
+ (if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
31185
+ (match_operand:SFDF 2 "gpc_reg_operand" ""))
31188
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
31190
+ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
31194
+(define_insn "*smax<mode>3_vsx"
31195
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31196
+ (smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
31197
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
31198
+ "TARGET_<MODE>_FPR && TARGET_VSX"
31199
+ "xsmaxdp %x0,%x1,%x2"
31200
+ [(set_attr "type" "fp")])
31202
+(define_expand "smin<mode>3"
31203
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31204
+ (if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
31205
+ (match_operand:SFDF 2 "gpc_reg_operand" ""))
31208
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
31210
+ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
31214
+(define_insn "*smin<mode>3_vsx"
31215
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31216
+ (smin:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
31217
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
31218
+ "TARGET_<MODE>_FPR && TARGET_VSX"
31219
+ "xsmindp %x0,%x1,%x2"
31220
+ [(set_attr "type" "fp")])
31223
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31224
+ (match_operator:SFDF 3 "min_max_operator"
31225
+ [(match_operand:SFDF 1 "gpc_reg_operand" "")
31226
+ (match_operand:SFDF 2 "gpc_reg_operand" "")]))]
31227
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math
31231
+ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]), operands[1],
31237
[(set (match_operand:SF 0 "gpc_reg_operand" "")
31238
(match_operator:SF 3 "min_max_operator"
31239
[(match_operand:SF 1 "gpc_reg_operand" "")
31240
@@ -4904,208 +5221,9 @@
31242
[(set_attr "type" "fp")])
31244
-(define_expand "negdf2"
31245
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31246
- (neg:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
31247
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31250
-(define_insn "*negdf2_fpr"
31251
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31252
- (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
31253
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31254
- && !VECTOR_UNIT_VSX_P (DFmode)"
31256
- [(set_attr "type" "fp")])
31258
-(define_expand "absdf2"
31259
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31260
- (abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
31261
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31264
-(define_insn "*absdf2_fpr"
31265
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31266
- (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
31267
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31268
- && !VECTOR_UNIT_VSX_P (DFmode)"
31270
- [(set_attr "type" "fp")])
31272
-(define_insn "*nabsdf2_fpr"
31273
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31274
- (neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
31275
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31276
- && !VECTOR_UNIT_VSX_P (DFmode)"
31278
- [(set_attr "type" "fp")])
31280
-(define_expand "adddf3"
31281
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31282
- (plus:DF (match_operand:DF 1 "gpc_reg_operand" "")
31283
- (match_operand:DF 2 "gpc_reg_operand" "")))]
31284
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31287
-(define_insn "*adddf3_fpr"
31288
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31289
- (plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
31290
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
31291
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31292
- && !VECTOR_UNIT_VSX_P (DFmode)"
31294
- [(set_attr "type" "fp")
31295
- (set_attr "fp_type" "fp_addsub_d")])
31297
-(define_expand "subdf3"
31298
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31299
- (minus:DF (match_operand:DF 1 "gpc_reg_operand" "")
31300
- (match_operand:DF 2 "gpc_reg_operand" "")))]
31301
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31304
-(define_insn "*subdf3_fpr"
31305
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31306
- (minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
31307
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
31308
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31309
- && !VECTOR_UNIT_VSX_P (DFmode)"
31311
- [(set_attr "type" "fp")
31312
- (set_attr "fp_type" "fp_addsub_d")])
31314
-(define_expand "muldf3"
31315
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31316
- (mult:DF (match_operand:DF 1 "gpc_reg_operand" "")
31317
- (match_operand:DF 2 "gpc_reg_operand" "")))]
31318
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
31321
-(define_insn "*muldf3_fpr"
31322
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31323
- (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
31324
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
31325
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31326
- && !VECTOR_UNIT_VSX_P (DFmode)"
31328
- [(set_attr "type" "dmul")
31329
- (set_attr "fp_type" "fp_mul_d")])
31331
-(define_expand "divdf3"
31332
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31333
- (div:DF (match_operand:DF 1 "gpc_reg_operand" "")
31334
- (match_operand:DF 2 "gpc_reg_operand" "")))]
31335
- "TARGET_HARD_FLOAT
31336
- && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)
31337
- && !TARGET_SIMPLE_FPU"
31340
-(define_insn "*divdf3_fpr"
31341
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31342
- (div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
31343
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
31344
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
31345
- && !VECTOR_UNIT_VSX_P (DFmode)"
31347
- [(set_attr "type" "ddiv")])
31349
-(define_insn "*fred_fpr"
31350
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
31351
- (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
31352
- "TARGET_FRE && !VECTOR_UNIT_VSX_P (DFmode)"
31354
- [(set_attr "type" "fp")])
31356
-(define_insn "*rsqrtdf_internal1"
31357
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31358
- (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
31360
- "TARGET_FRSQRTE && !VECTOR_UNIT_VSX_P (DFmode)"
31362
- [(set_attr "type" "fp")])
31364
-; builtin fma support
31365
-(define_insn "*fmadf4_fpr"
31366
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
31367
- (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
31368
- (match_operand:DF 2 "gpc_reg_operand" "f")
31369
- (match_operand:DF 3 "gpc_reg_operand" "f")))]
31370
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31371
- && VECTOR_UNIT_NONE_P (DFmode)"
31372
- "fmadd %0,%1,%2,%3"
31373
- [(set_attr "type" "fp")
31374
- (set_attr "fp_type" "fp_maddsub_d")])
31376
-(define_insn "*fmsdf4_fpr"
31377
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
31378
- (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
31379
- (match_operand:DF 2 "gpc_reg_operand" "f")
31380
- (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
31381
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31382
- && VECTOR_UNIT_NONE_P (DFmode)"
31383
- "fmsub %0,%1,%2,%3"
31384
- [(set_attr "type" "fp")
31385
- (set_attr "fp_type" "fp_maddsub_d")])
31387
-(define_insn "*nfmadf4_fpr"
31388
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
31389
- (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
31390
- (match_operand:DF 2 "gpc_reg_operand" "f")
31391
- (match_operand:DF 3 "gpc_reg_operand" "f"))))]
31392
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31393
- && VECTOR_UNIT_NONE_P (DFmode)"
31394
- "fnmadd %0,%1,%2,%3"
31395
- [(set_attr "type" "fp")
31396
- (set_attr "fp_type" "fp_maddsub_d")])
31398
-(define_insn "*nfmsdf4_fpr"
31399
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
31400
- (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
31401
- (match_operand:DF 2 "gpc_reg_operand" "f")
31402
- (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
31403
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31404
- && VECTOR_UNIT_NONE_P (DFmode)"
31405
- "fnmsub %0,%1,%2,%3"
31406
- [(set_attr "type" "fp")
31407
- (set_attr "fp_type" "fp_maddsub_d")])
31409
-(define_expand "sqrtdf2"
31410
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31411
- (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
31412
- "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
31415
-(define_insn "*sqrtdf2_fpr"
31416
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
31417
- (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
31418
- "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31419
- && !VECTOR_UNIT_VSX_P (DFmode)"
31421
- [(set_attr "type" "dsqrt")])
31423
;; The conditional move instructions allow us to perform max and min
31424
;; operations even when
31426
-(define_expand "smaxdf3"
31427
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31428
- (if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
31429
- (match_operand:DF 2 "gpc_reg_operand" ""))
31432
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31433
- && !flag_trapping_math"
31434
- "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
31436
-(define_expand "smindf3"
31437
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
31438
- (if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
31439
- (match_operand:DF 2 "gpc_reg_operand" ""))
31442
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
31443
- && !flag_trapping_math"
31444
- "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
31447
[(set (match_operand:DF 0 "gpc_reg_operand" "")
31448
(match_operator:DF 3 "min_max_operator"
31449
@@ -5159,12 +5277,15 @@
31450
; We don't define lfiwax/lfiwzx with the normal definition, because we
31451
; don't want to support putting SImode in FPR registers.
31452
(define_insn "lfiwax"
31453
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
31454
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
31455
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
31456
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
31458
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
31460
- [(set_attr "type" "fpload")])
31465
+ [(set_attr "type" "fpload,fpload,mffgpr")])
31467
; This split must be run before register allocation because it allocates the
31468
; memory slot that is needed to move values to/from the FPR. We don't allocate
31469
@@ -5186,7 +5307,8 @@
31470
rtx src = operands[1];
31473
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
31474
+ if (!MEM_P (src) && TARGET_POWERPC64
31475
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
31476
tmp = convert_to_mode (DImode, src, false);
31479
@@ -5235,12 +5357,15 @@
31480
(set_attr "type" "fpload")])
31482
(define_insn "lfiwzx"
31483
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
31484
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
31485
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
31486
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
31488
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
31490
- [(set_attr "type" "fpload")])
31495
+ [(set_attr "type" "fpload,fpload,mftgpr")])
31497
(define_insn_and_split "floatunssi<mode>2_lfiwzx"
31498
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
31499
@@ -5257,7 +5382,8 @@
31500
rtx src = operands[1];
31503
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
31504
+ if (!MEM_P (src) && TARGET_POWERPC64
31505
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
31506
tmp = convert_to_mode (DImode, src, true);
31509
@@ -5548,7 +5674,7 @@
31510
emit_insn (gen_stfiwx (dest, tmp));
31513
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
31514
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
31516
dest = gen_lowpart (DImode, dest);
31517
emit_move_insn (dest, tmp);
31518
@@ -5642,7 +5768,7 @@
31519
emit_insn (gen_stfiwx (dest, tmp));
31522
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
31523
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
31525
dest = gen_lowpart (DImode, dest);
31526
emit_move_insn (dest, tmp);
31527
@@ -5781,66 +5907,52 @@
31528
[(set (match_operand:DI 0 "gpc_reg_operand" "=d")
31529
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
31531
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
31532
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
31534
[(set_attr "type" "fp")])
31536
-(define_expand "btrunc<mode>2"
31537
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31538
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
31539
+(define_insn "btrunc<mode>2"
31540
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31541
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
31543
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
31545
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
31549
+ [(set_attr "type" "fp")
31550
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
31552
-(define_insn "*btrunc<mode>2_fpr"
31553
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
31554
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
31556
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
31557
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
31559
- [(set_attr "type" "fp")])
31561
-(define_expand "ceil<mode>2"
31562
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31563
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
31564
+(define_insn "ceil<mode>2"
31565
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31566
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
31568
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
31570
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
31574
+ [(set_attr "type" "fp")
31575
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
31577
-(define_insn "*ceil<mode>2_fpr"
31578
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
31579
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
31581
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
31582
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
31584
- [(set_attr "type" "fp")])
31586
-(define_expand "floor<mode>2"
31587
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
31588
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
31589
+(define_insn "floor<mode>2"
31590
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
31591
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
31593
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
31595
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
31599
+ [(set_attr "type" "fp")
31600
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
31602
-(define_insn "*floor<mode>2_fpr"
31603
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
31604
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
31606
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
31607
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
31609
- [(set_attr "type" "fp")])
31611
;; No VSX equivalent to frin
31612
(define_insn "round<mode>2"
31613
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
31614
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
31616
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
31617
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
31619
- [(set_attr "type" "fp")])
31620
+ [(set_attr "type" "fp")
31621
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
31623
; An UNSPEC is used so we don't have to support SImode in FP registers.
31624
(define_insn "stfiwx"
31625
@@ -7195,10 +7307,19 @@
31627
[(set (match_operand:DI 0 "gpc_reg_operand" "")
31628
(and:DI (match_operand:DI 1 "gpc_reg_operand" "")
31629
- (match_operand:DI 2 "and64_2_operand" "")))
31630
+ (match_operand:DI 2 "reg_or_cint_operand" "")))
31631
(clobber (match_scratch:CC 3 ""))])]
31632
- "TARGET_POWERPC64"
31636
+ if (!TARGET_POWERPC64)
31638
+ rtx cc = gen_rtx_SCRATCH (CCmode);
31639
+ rs6000_split_logical (operands, AND, false, false, false, cc);
31642
+ else if (!and64_2_operand (operands[2], DImode))
31643
+ operands[2] = force_reg (DImode, operands[2]);
31646
(define_insn "anddi3_mc"
31647
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
31648
@@ -7379,12 +7500,18 @@
31649
(define_expand "iordi3"
31650
[(set (match_operand:DI 0 "gpc_reg_operand" "")
31651
(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
31652
- (match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
31653
- "TARGET_POWERPC64"
31655
+ (match_operand:DI 2 "reg_or_cint_operand" "")))]
31658
- if (non_logical_cint_operand (operands[2], DImode))
31659
+ if (!TARGET_POWERPC64)
31661
+ rs6000_split_logical (operands, IOR, false, false, false, NULL_RTX);
31664
+ else if (!reg_or_logical_cint_operand (operands[2], DImode))
31665
+ operands[2] = force_reg (DImode, operands[2]);
31666
+ else if (non_logical_cint_operand (operands[2], DImode))
31668
HOST_WIDE_INT value;
31669
rtx tmp = ((!can_create_pseudo_p ()
31670
|| rtx_equal_p (operands[0], operands[1]))
31671
@@ -7408,15 +7535,21 @@
31672
emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
31678
(define_expand "xordi3"
31679
[(set (match_operand:DI 0 "gpc_reg_operand" "")
31680
(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
31681
- (match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
31682
- "TARGET_POWERPC64"
31684
+ (match_operand:DI 2 "reg_or_cint_operand" "")))]
31687
+ if (!TARGET_POWERPC64)
31689
+ rs6000_split_logical (operands, XOR, false, false, false, NULL_RTX);
31692
+ else if (!reg_or_logical_cint_operand (operands[2], DImode))
31693
+ operands[2] = force_reg (DImode, operands[2]);
31694
if (non_logical_cint_operand (operands[2], DImode))
31696
HOST_WIDE_INT value;
31697
@@ -7442,7 +7575,7 @@
31698
emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
31704
(define_insn "*booldi3_internal1"
31705
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
31706
@@ -7678,7 +7811,385 @@
31707
(compare:CC (match_dup 0)
31712
+(define_insn "*eqv<mode>3"
31713
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
31715
+ (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
31716
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
31719
+ [(set_attr "type" "integer")
31720
+ (set_attr "length" "4")])
31723
+;; 128-bit logical operations expanders
31725
+(define_expand "and<mode>3"
31726
+ [(parallel [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31728
+ (match_operand:BOOL_128 1 "vlogical_operand" "")
31729
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))
31730
+ (clobber (match_scratch:CC 3 ""))])]
31734
+(define_expand "ior<mode>3"
31735
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31736
+ (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
31737
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))]
31741
+(define_expand "xor<mode>3"
31742
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31743
+ (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
31744
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))]
31748
+(define_expand "one_cmpl<mode>2"
31749
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31750
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
31754
+(define_expand "nor<mode>3"
31755
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31757
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
31758
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
31762
+(define_expand "andc<mode>3"
31763
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31765
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
31766
+ (match_operand:BOOL_128 1 "vlogical_operand" "")))]
31770
+;; Power8 vector logical instructions.
31771
+(define_expand "eqv<mode>3"
31772
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31774
+ (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
31775
+ (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
31776
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
31779
+;; Rewrite nand into canonical form
31780
+(define_expand "nand<mode>3"
31781
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31783
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
31784
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
31785
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
31788
+;; The canonical form is to have the negated element first, so we need to
31789
+;; reverse arguments.
31790
+(define_expand "orc<mode>3"
31791
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
31793
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
31794
+ (match_operand:BOOL_128 1 "vlogical_operand" "")))]
31795
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
31798
+;; 128-bit logical operations insns and split operations
31799
+(define_insn_and_split "*and<mode>3_internal"
31800
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
31802
+ (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
31803
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))
31804
+ (clobber (match_scratch:CC 3 "<BOOL_REGS_AND_CR0>"))]
31807
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
31808
+ return "xxland %x0,%x1,%x2";
31810
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
31811
+ return "vand %0,%1,%2";
31815
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
31818
+ rs6000_split_logical (operands, AND, false, false, false, operands[3]);
31821
+ [(set (attr "type")
31823
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31824
+ (const_string "vecsimple")
31825
+ (const_string "integer")))
31826
+ (set (attr "length")
31828
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31829
+ (const_string "4")
31831
+ (match_test "TARGET_POWERPC64")
31832
+ (const_string "8")
31833
+ (const_string "16"))))])
31835
+;; 128-bit IOR/XOR
31836
+(define_insn_and_split "*bool<mode>3_internal"
31837
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
31838
+ (match_operator:BOOL_128 3 "boolean_or_operator"
31839
+ [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
31840
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
31843
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
31844
+ return "xxl%q3 %x0,%x1,%x2";
31846
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
31847
+ return "v%q3 %0,%1,%2";
31851
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
31854
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false,
31858
+ [(set (attr "type")
31860
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31861
+ (const_string "vecsimple")
31862
+ (const_string "integer")))
31863
+ (set (attr "length")
31865
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31866
+ (const_string "4")
31868
+ (match_test "TARGET_POWERPC64")
31869
+ (const_string "8")
31870
+ (const_string "16"))))])
31872
+;; 128-bit ANDC/ORC
31873
+(define_insn_and_split "*boolc<mode>3_internal1"
31874
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
31875
+ (match_operator:BOOL_128 3 "boolean_operator"
31877
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
31878
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
31879
+ "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
31881
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
31882
+ return "xxl%q3 %x0,%x1,%x2";
31884
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
31885
+ return "v%q3 %0,%1,%2";
31889
+ "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
31890
+ && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
31893
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
31897
+ [(set (attr "type")
31899
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31900
+ (const_string "vecsimple")
31901
+ (const_string "integer")))
31902
+ (set (attr "length")
31904
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31905
+ (const_string "4")
31907
+ (match_test "TARGET_POWERPC64")
31908
+ (const_string "8")
31909
+ (const_string "16"))))])
31911
+(define_insn_and_split "*boolc<mode>3_internal2"
31912
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
31913
+ (match_operator:TI2 3 "boolean_operator"
31915
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
31916
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
31917
+ "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
31919
+ "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
31922
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
31926
+ [(set_attr "type" "integer")
31927
+ (set (attr "length")
31929
+ (match_test "TARGET_POWERPC64")
31930
+ (const_string "8")
31931
+ (const_string "16")))])
31933
+;; 128-bit NAND/NOR
31934
+(define_insn_and_split "*boolcc<mode>3_internal1"
31935
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
31936
+ (match_operator:BOOL_128 3 "boolean_operator"
31938
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>"))
31940
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))]
31941
+ "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
31943
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
31944
+ return "xxl%q3 %x0,%x1,%x2";
31946
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
31947
+ return "v%q3 %0,%1,%2";
31951
+ "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
31952
+ && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
31955
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
31959
+ [(set (attr "type")
31961
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31962
+ (const_string "vecsimple")
31963
+ (const_string "integer")))
31964
+ (set (attr "length")
31966
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
31967
+ (const_string "4")
31969
+ (match_test "TARGET_POWERPC64")
31970
+ (const_string "8")
31971
+ (const_string "16"))))])
31973
+(define_insn_and_split "*boolcc<mode>3_internal2"
31974
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
31975
+ (match_operator:TI2 3 "boolean_operator"
31977
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
31979
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))]
31980
+ "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
31982
+ "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
31985
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
31989
+ [(set_attr "type" "integer")
31990
+ (set (attr "length")
31992
+ (match_test "TARGET_POWERPC64")
31993
+ (const_string "8")
31994
+ (const_string "16")))])
31998
+(define_insn_and_split "*eqv<mode>3_internal1"
31999
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
32002
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")
32003
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
32004
+ "TARGET_P8_VECTOR"
32006
+ if (vsx_register_operand (operands[0], <MODE>mode))
32007
+ return "xxleqv %x0,%x1,%x2";
32011
+ "TARGET_P8_VECTOR && reload_completed
32012
+ && int_reg_operand (operands[0], <MODE>mode)"
32015
+ rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
32018
+ [(set (attr "type")
32020
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
32021
+ (const_string "vecsimple")
32022
+ (const_string "integer")))
32023
+ (set (attr "length")
32025
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
32026
+ (const_string "4")
32028
+ (match_test "TARGET_POWERPC64")
32029
+ (const_string "8")
32030
+ (const_string "16"))))])
32032
+(define_insn_and_split "*eqv<mode>3_internal2"
32033
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
32036
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r")
32037
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))]
32038
+ "!TARGET_P8_VECTOR"
32040
+ "reload_completed && !TARGET_P8_VECTOR"
32043
+ rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
32046
+ [(set_attr "type" "integer")
32047
+ (set (attr "length")
32049
+ (match_test "TARGET_POWERPC64")
32050
+ (const_string "8")
32051
+ (const_string "16")))])
32053
+;; 128-bit one's complement
32054
+(define_insn_and_split "*one_cmpl<mode>3_internal"
32055
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
32057
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
32060
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
32061
+ return "xxlnor %x0,%x1,%x1";
32063
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
32064
+ return "vnor %0,%1,%1";
32068
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
32071
+ rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
32074
+ [(set (attr "type")
32076
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
32077
+ (const_string "vecsimple")
32078
+ (const_string "integer")))
32079
+ (set (attr "length")
32081
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
32082
+ (const_string "4")
32084
+ (match_test "TARGET_POWERPC64")
32085
+ (const_string "8")
32086
+ (const_string "16"))))])
32089
;; Now define ways of moving data around.
32091
;; Set up a register with a value from the GOT table
32092
@@ -7765,7 +8276,31 @@
32096
- [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*")
32097
+ [(set_attr_alternative "type"
32098
+ [(const_string "*")
32099
+ (const_string "*")
32101
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32102
+ (const_string "load_ux")
32104
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32105
+ (const_string "load_u")
32106
+ (const_string "load")))
32108
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32109
+ (const_string "store_ux")
32111
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32112
+ (const_string "store_u")
32113
+ (const_string "store")))
32114
+ (const_string "*")
32115
+ (const_string "*")
32116
+ (const_string "*")
32117
+ (const_string "mfjmpr")
32118
+ (const_string "mtjmpr")
32119
+ (const_string "*")
32120
+ (const_string "*")])
32122
(set_attr "length" "4,4,4,4,4,4,8,4,4,4,4")])
32124
(define_insn "*movsi_internal1_single"
32125
@@ -7787,7 +8322,44 @@
32129
- [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,*,*")
32130
+ [(set_attr_alternative "type"
32131
+ [(const_string "*")
32132
+ (const_string "*")
32134
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32135
+ (const_string "load_ux")
32137
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32138
+ (const_string "load_u")
32139
+ (const_string "load")))
32141
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32142
+ (const_string "store_ux")
32144
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32145
+ (const_string "store_u")
32146
+ (const_string "store")))
32147
+ (const_string "*")
32148
+ (const_string "*")
32149
+ (const_string "*")
32150
+ (const_string "mfjmpr")
32151
+ (const_string "mtjmpr")
32152
+ (const_string "*")
32153
+ (const_string "*")
32155
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32156
+ (const_string "fpstore_ux")
32158
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32159
+ (const_string "fpstore_u")
32160
+ (const_string "fpstore")))
32162
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32163
+ (const_string "fpload_ux")
32165
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32166
+ (const_string "fpload_u")
32167
+ (const_string "fpload")))])
32168
(set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
32170
;; Split a load of a large constant into the appropriate two-insn
32171
@@ -7822,7 +8394,7 @@
32175
- [(set_attr "type" "cmp,compare,cmp")
32176
+ [(set_attr "type" "cmp,fast_compare,cmp")
32177
(set_attr "length" "4,4,8")])
32180
@@ -7850,7 +8422,26 @@
32184
- [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
32185
+ [(set_attr_alternative "type"
32186
+ [(const_string "*")
32188
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32189
+ (const_string "load_ux")
32191
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32192
+ (const_string "load_u")
32193
+ (const_string "load")))
32195
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32196
+ (const_string "store_ux")
32198
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32199
+ (const_string "store_u")
32200
+ (const_string "store")))
32201
+ (const_string "*")
32202
+ (const_string "mfjmpr")
32203
+ (const_string "mtjmpr")
32204
+ (const_string "*")])])
32206
(define_expand "mov<mode>"
32207
[(set (match_operand:INT 0 "general_operand" "")
32208
@@ -7871,7 +8462,26 @@
32212
- [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
32213
+ [(set_attr_alternative "type"
32214
+ [(const_string "*")
32216
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32217
+ (const_string "load_ux")
32219
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32220
+ (const_string "load_u")
32221
+ (const_string "load")))
32223
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32224
+ (const_string "store_ux")
32226
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32227
+ (const_string "store_u")
32228
+ (const_string "store")))
32229
+ (const_string "*")
32230
+ (const_string "mfjmpr")
32231
+ (const_string "mtjmpr")
32232
+ (const_string "*")])])
32234
;; Here is how to move condition codes around. When we store CC data in
32235
;; an integer register or memory, we store just the high-order 4 bits.
32236
@@ -7899,7 +8509,7 @@
32242
[(set (attr "type")
32243
(cond [(eq_attr "alternative" "0,3")
32244
(const_string "cr_logical")
32245
@@ -7912,9 +8522,23 @@
32246
(eq_attr "alternative" "9")
32247
(const_string "mtjmpr")
32248
(eq_attr "alternative" "10")
32249
- (const_string "load")
32251
+ (match_test "update_indexed_address_mem (operands[1],
32253
+ (const_string "load_ux")
32255
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32256
+ (const_string "load_u")
32257
+ (const_string "load")))
32258
(eq_attr "alternative" "11")
32259
- (const_string "store")
32261
+ (match_test "update_indexed_address_mem (operands[0],
32263
+ (const_string "store_ux")
32265
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32266
+ (const_string "store_u")
32267
+ (const_string "store")))
32268
(match_test "TARGET_MFCRF")
32269
(const_string "mfcrf")
32271
@@ -7926,15 +8550,17 @@
32272
;; can produce floating-point values in fixed-point registers. Unless the
32273
;; value is a simple constant or already in memory, we deal with this by
32274
;; allocating memory and copying the value explicitly via that memory location.
32275
-(define_expand "movsf"
32276
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
32277
- (match_operand:SF 1 "any_operand" ""))]
32279
- "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
32281
+;; Move 32-bit binary/decimal floating point
32282
+(define_expand "mov<mode>"
32283
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
32284
+ (match_operand:FMOVE32 1 "any_operand" ""))]
32286
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
32289
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
32290
- (match_operand:SF 1 "const_double_operand" ""))]
32291
+ [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
32292
+ (match_operand:FMOVE32 1 "const_double_operand" ""))]
32294
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
32295
|| (GET_CODE (operands[0]) == SUBREG
32296
@@ -7947,10 +8573,10 @@
32297
REAL_VALUE_TYPE rv;
32299
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
32300
- REAL_VALUE_TO_TARGET_SINGLE (rv, l);
32301
+ <real_value_to_target> (rv, l);
32303
if (! TARGET_POWERPC64)
32304
- operands[2] = operand_subword (operands[0], 0, 0, SFmode);
32305
+ operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
32307
operands[2] = gen_lowpart (SImode, operands[0]);
32309
@@ -7957,11 +8583,11 @@
32310
operands[3] = gen_int_mode (l, SImode);
32313
-(define_insn "*movsf_hardfloat"
32314
- [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
32315
- (match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
32316
- "(gpc_reg_operand (operands[0], SFmode)
32317
- || gpc_reg_operand (operands[1], SFmode))
32318
+(define_insn "mov<mode>_hardfloat"
32319
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
32320
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
32321
+ "(gpc_reg_operand (operands[0], <MODE>mode)
32322
+ || gpc_reg_operand (operands[1], <MODE>mode))
32323
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
32326
@@ -7968,21 +8594,68 @@
32332
+ xxlor %x0,%x1,%x1
32333
+ xxlxor %x0,%x0,%x0
32345
- [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
32346
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
32347
+ [(set_attr_alternative "type"
32348
+ [(const_string "*")
32350
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32351
+ (const_string "load_ux")
32353
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32354
+ (const_string "load_u")
32355
+ (const_string "load")))
32357
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32358
+ (const_string "store_ux")
32360
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32361
+ (const_string "store_u")
32362
+ (const_string "store")))
32363
+ (const_string "fp")
32364
+ (const_string "vecsimple")
32365
+ (const_string "vecsimple")
32367
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32368
+ (const_string "fpload_ux")
32370
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32371
+ (const_string "fpload_u")
32372
+ (const_string "fpload")))
32374
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32375
+ (const_string "fpstore_ux")
32377
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32378
+ (const_string "fpstore_u")
32379
+ (const_string "fpstore")))
32380
+ (const_string "fpload")
32381
+ (const_string "fpstore")
32382
+ (const_string "mftgpr")
32383
+ (const_string "mffgpr")
32384
+ (const_string "mtjmpr")
32385
+ (const_string "mfjmpr")
32386
+ (const_string "*")
32387
+ (const_string "*")
32388
+ (const_string "*")])
32389
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
32391
-(define_insn "*movsf_softfloat"
32392
- [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
32393
- (match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
32394
- "(gpc_reg_operand (operands[0], SFmode)
32395
- || gpc_reg_operand (operands[1], SFmode))
32396
+(define_insn "*mov<mode>_softfloat"
32397
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
32398
+ (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
32399
+ "(gpc_reg_operand (operands[0], <MODE>mode)
32400
+ || gpc_reg_operand (operands[1], <MODE>mode))
32401
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
32404
@@ -7995,19 +8668,42 @@
32408
- [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*")
32409
+ [(set_attr_alternative "type"
32410
+ [(const_string "*")
32411
+ (const_string "mtjmpr")
32412
+ (const_string "mfjmpr")
32414
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32415
+ (const_string "load_ux")
32417
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32418
+ (const_string "load_u")
32419
+ (const_string "load")))
32421
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32422
+ (const_string "store_ux")
32424
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32425
+ (const_string "store_u")
32426
+ (const_string "store")))
32427
+ (const_string "*")
32428
+ (const_string "*")
32429
+ (const_string "*")
32430
+ (const_string "*")
32431
+ (const_string "*")])
32432
(set_attr "length" "4,4,4,4,4,4,4,4,8,4")])
32435
-(define_expand "movdf"
32436
- [(set (match_operand:DF 0 "nonimmediate_operand" "")
32437
- (match_operand:DF 1 "any_operand" ""))]
32438
+;; Move 64-bit binary/decimal floating point
32439
+(define_expand "mov<mode>"
32440
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "")
32441
+ (match_operand:FMOVE64 1 "any_operand" ""))]
32443
- "{ rs6000_emit_move (operands[0], operands[1], DFmode); DONE; }")
32444
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
32447
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
32448
- (match_operand:DF 1 "const_int_operand" ""))]
32449
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
32450
+ (match_operand:FMOVE64 1 "const_int_operand" ""))]
32451
"! TARGET_POWERPC64 && reload_completed
32452
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
32453
|| (GET_CODE (operands[0]) == SUBREG
32454
@@ -8020,8 +8716,8 @@
32455
int endian = (WORDS_BIG_ENDIAN == 0);
32456
HOST_WIDE_INT value = INTVAL (operands[1]);
32458
- operands[2] = operand_subword (operands[0], endian, 0, DFmode);
32459
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
32460
+ operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
32461
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
32462
#if HOST_BITS_PER_WIDE_INT == 32
32463
operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
32465
@@ -8031,8 +8727,8 @@
32469
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
32470
- (match_operand:DF 1 "const_double_operand" ""))]
32471
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
32472
+ (match_operand:FMOVE64 1 "const_double_operand" ""))]
32473
"! TARGET_POWERPC64 && reload_completed
32474
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
32475
|| (GET_CODE (operands[0]) == SUBREG
32476
@@ -8047,17 +8743,17 @@
32477
REAL_VALUE_TYPE rv;
32479
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
32480
- REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
32481
+ <real_value_to_target> (rv, l);
32483
- operands[2] = operand_subword (operands[0], endian, 0, DFmode);
32484
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
32485
+ operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
32486
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
32487
operands[4] = gen_int_mode (l[endian], SImode);
32488
operands[5] = gen_int_mode (l[1 - endian], SImode);
32492
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
32493
- (match_operand:DF 1 "const_double_operand" ""))]
32494
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
32495
+ (match_operand:FMOVE64 1 "const_double_operand" ""))]
32496
"TARGET_POWERPC64 && reload_completed
32497
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
32498
|| (GET_CODE (operands[0]) == SUBREG
32499
@@ -8074,7 +8770,7 @@
32502
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
32503
- REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
32504
+ <real_value_to_target> (rv, l);
32506
operands[2] = gen_lowpart (DImode, operands[0]);
32507
/* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */
32508
@@ -8099,22 +8795,19 @@
32509
;; since the D-form version of the memory instructions does not need a GPR for
32512
-(define_insn "*movdf_hardfloat32"
32513
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
32514
- (match_operand:DF 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
32515
+(define_insn "*mov<mode>_hardfloat32"
32516
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
32517
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
32518
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
32519
- && (gpc_reg_operand (operands[0], DFmode)
32520
- || gpc_reg_operand (operands[1], DFmode))"
32521
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32522
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32530
- stxsd%U0x %x1,%y0
32532
- xxlor %x0,%x1,%x1
32536
@@ -8122,115 +8815,140 @@
32540
- [(set_attr "type" "fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*")
32541
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
32542
+ [(set_attr_alternative "type"
32544
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32545
+ (const_string "fpstore_ux")
32547
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32548
+ (const_string "fpstore_u")
32549
+ (const_string "fpstore")))
32551
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32552
+ (const_string "fpload_ux")
32554
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32555
+ (const_string "fpload_u")
32556
+ (const_string "fpload")))
32557
+ (const_string "fp")
32559
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32560
+ (const_string "fpload_ux")
32561
+ (const_string "fpload"))
32563
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32564
+ (const_string "fpstore_ux")
32565
+ (const_string "fpstore"))
32566
+ (const_string "vecsimple")
32567
+ (const_string "vecsimple")
32568
+ (const_string "store")
32569
+ (const_string "load")
32570
+ (const_string "two")
32571
+ (const_string "fp")
32572
+ (const_string "fp")
32573
+ (const_string "*")])
32574
+ (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
32576
-(define_insn "*movdf_softfloat32"
32577
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
32578
- (match_operand:DF 1 "input_operand" "r,Y,r,G,H,F"))]
32579
+(define_insn "*mov<mode>_softfloat32"
32580
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
32581
+ (match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))]
32582
"! TARGET_POWERPC64
32583
&& ((TARGET_FPRS && TARGET_SINGLE_FLOAT)
32584
|| TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
32585
- && (gpc_reg_operand (operands[0], DFmode)
32586
- || gpc_reg_operand (operands[1], DFmode))"
32587
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32588
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32590
[(set_attr "type" "store,load,two,*,*,*")
32591
(set_attr "length" "8,8,8,8,12,16")])
32593
-;; Reload patterns to support gpr load/store with misaligned mem.
32594
-;; and multiple gpr load/store at offset >= 0xfffc
32595
-(define_expand "reload_<mode>_store"
32596
- [(parallel [(match_operand 0 "memory_operand" "=m")
32597
- (match_operand 1 "gpc_reg_operand" "r")
32598
- (match_operand:GPR 2 "register_operand" "=&b")])]
32601
- rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
32605
-(define_expand "reload_<mode>_load"
32606
- [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
32607
- (match_operand 1 "memory_operand" "m")
32608
- (match_operand:GPR 2 "register_operand" "=b")])]
32611
- rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
32615
; ld/std require word-aligned displacements -> 'Y' constraint.
32616
; List Y->r and r->Y before r->r for reload.
32617
-(define_insn "*movdf_hardfloat64_mfpgpr"
32618
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r,r,d")
32619
- (match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
32620
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
32621
- && TARGET_DOUBLE_FLOAT
32622
- && (gpc_reg_operand (operands[0], DFmode)
32623
- || gpc_reg_operand (operands[1], DFmode))"
32624
+(define_insn "*mov<mode>_hardfloat64"
32625
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
32626
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
32627
+ "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
32628
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32629
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32634
- xxlor %x0,%x1,%x1
32635
- xxlor %x0,%x1,%x1
32638
- stxsd%U0x %x1,%y0
32639
- stxsd%U0x %x1,%y0
32644
+ stxsd%U0x %x1,%y0
32645
+ xxlor %x0,%x1,%x1
32655
- [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
32656
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
32658
-; ld/std require word-aligned displacements -> 'Y' constraint.
32659
-; List Y->r and r->Y before r->r for reload.
32660
-(define_insn "*movdf_hardfloat64"
32661
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r")
32662
- (match_operand:DF 1 "input_operand" "d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))]
32663
- "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
32664
- && TARGET_DOUBLE_FLOAT
32665
- && (gpc_reg_operand (operands[0], DFmode)
32666
- || gpc_reg_operand (operands[1], DFmode))"
32676
- stxsd%U0x %x1,%y0
32677
- stxsd%U0x %x1,%y0
32678
- xxlor %x0,%x1,%x1
32679
- xxlor %x0,%x1,%x1
32680
- xxlxor %x0,%x0,%x0
32687
- [(set_attr "type" "fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
32688
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
32694
+ [(set_attr_alternative "type"
32696
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32697
+ (const_string "fpstore_ux")
32699
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32700
+ (const_string "fpstore_u")
32701
+ (const_string "fpstore")))
32703
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32704
+ (const_string "fpload_ux")
32706
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32707
+ (const_string "fpload_u")
32708
+ (const_string "fpload")))
32709
+ (const_string "fp")
32711
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32712
+ (const_string "fpload_ux")
32713
+ (const_string "fpload"))
32715
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32716
+ (const_string "fpstore_ux")
32717
+ (const_string "fpstore"))
32718
+ (const_string "vecsimple")
32719
+ (const_string "vecsimple")
32721
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32722
+ (const_string "store_ux")
32724
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32725
+ (const_string "store_u")
32726
+ (const_string "store")))
32728
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32729
+ (const_string "load_ux")
32731
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32732
+ (const_string "load_u")
32733
+ (const_string "load")))
32734
+ (const_string "*")
32735
+ (const_string "mtjmpr")
32736
+ (const_string "mfjmpr")
32737
+ (const_string "*")
32738
+ (const_string "*")
32739
+ (const_string "*")
32740
+ (const_string "*")
32741
+ (const_string "mftgpr")
32742
+ (const_string "mffgpr")
32743
+ (const_string "mftgpr")
32744
+ (const_string "mffgpr")])
32745
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
32747
-(define_insn "*movdf_softfloat64"
32748
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
32749
- (match_operand:DF 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
32750
+(define_insn "*mov<mode>_softfloat64"
32751
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
32752
+ (match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
32753
"TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
32754
- && (gpc_reg_operand (operands[0], DFmode)
32755
- || gpc_reg_operand (operands[1], DFmode))"
32756
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32757
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32761
@@ -8241,38 +8959,87 @@
32765
- [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*")
32766
+ [(set_attr_alternative "type"
32768
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
32769
+ (const_string "store_ux")
32771
+ (match_test "update_address_mem (operands[0], VOIDmode)")
32772
+ (const_string "store_u")
32773
+ (const_string "store")))
32775
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
32776
+ (const_string "load_ux")
32778
+ (match_test "update_address_mem (operands[1], VOIDmode)")
32779
+ (const_string "load_u")
32780
+ (const_string "load")))
32781
+ (const_string "*")
32782
+ (const_string "mtjmpr")
32783
+ (const_string "mfjmpr")
32784
+ (const_string "*")
32785
+ (const_string "*")
32786
+ (const_string "*")
32787
+ (const_string "*")])
32788
(set_attr "length" "4,4,4,4,4,8,12,16,4")])
32790
-(define_expand "movtf"
32791
- [(set (match_operand:TF 0 "general_operand" "")
32792
- (match_operand:TF 1 "any_operand" ""))]
32793
- "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
32794
- "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
32795
+(define_expand "mov<mode>"
32796
+ [(set (match_operand:FMOVE128 0 "general_operand" "")
32797
+ (match_operand:FMOVE128 1 "any_operand" ""))]
32799
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
32801
;; It's important to list Y->r and r->Y before r->r because otherwise
32802
;; reload, given m->r, will try to pick r->r and reload it, which
32803
;; doesn't make progress.
32804
-(define_insn_and_split "*movtf_internal"
32805
- [(set (match_operand:TF 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
32806
- (match_operand:TF 1 "input_operand" "d,m,d,r,YGHF,r"))]
32807
- "!TARGET_IEEEQUAD
32808
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
32809
- && (gpc_reg_operand (operands[0], TFmode)
32810
- || gpc_reg_operand (operands[1], TFmode))"
32812
+;; We can't split little endian direct moves of TDmode, because the words are
32813
+;; not swapped like they are for TImode or TFmode. Subregs therefore are
32814
+;; problematical. Don't allow direct move for this case.
32816
+(define_insn_and_split "*mov<mode>_64bit_dm"
32817
+ [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm")
32818
+ (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))]
32819
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
32820
+ && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
32821
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32822
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32824
"&& reload_completed"
32826
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
32827
+ [(set_attr "length" "8,8,8,12,12,8,8,8")])
32829
+(define_insn_and_split "*movtd_64bit_nodm"
32830
+ [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
32831
+ (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
32832
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN
32833
+ && (gpc_reg_operand (operands[0], TDmode)
32834
+ || gpc_reg_operand (operands[1], TDmode))"
32836
+ "&& reload_completed"
32838
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
32839
+ [(set_attr "length" "8,8,8,12,12,8")])
32841
+(define_insn_and_split "*mov<mode>_32bit"
32842
+ [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
32843
+ (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
32844
+ "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
32845
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32846
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32848
+ "&& reload_completed"
32850
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
32851
[(set_attr "length" "8,8,8,20,20,16")])
32853
-(define_insn_and_split "*movtf_softfloat"
32854
- [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=Y,r,r")
32855
- (match_operand:TF 1 "input_operand" "r,YGHF,r"))]
32856
- "!TARGET_IEEEQUAD
32857
- && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
32858
- && (gpc_reg_operand (operands[0], TFmode)
32859
- || gpc_reg_operand (operands[1], TFmode))"
32860
+(define_insn_and_split "*mov<mode>_softfloat"
32861
+ [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
32862
+ (match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))]
32863
+ "(TARGET_SOFT_FLOAT || !TARGET_FPRS)
32864
+ && (gpc_reg_operand (operands[0], <MODE>mode)
32865
+ || gpc_reg_operand (operands[1], <MODE>mode))"
32867
"&& reload_completed"
32869
@@ -8557,6 +9324,252 @@
32870
operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
32873
+;; Reload helper functions used by rs6000_secondary_reload. The patterns all
32874
+;; must have 3 arguments, and scratch register constraint must be a single
32877
+;; Reload patterns to support gpr load/store with misaligned mem.
32878
+;; and multiple gpr load/store at offset >= 0xfffc
32879
+(define_expand "reload_<mode>_store"
32880
+ [(parallel [(match_operand 0 "memory_operand" "=m")
32881
+ (match_operand 1 "gpc_reg_operand" "r")
32882
+ (match_operand:GPR 2 "register_operand" "=&b")])]
32885
+ rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
32889
+(define_expand "reload_<mode>_load"
32890
+ [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
32891
+ (match_operand 1 "memory_operand" "m")
32892
+ (match_operand:GPR 2 "register_operand" "=b")])]
32895
+ rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
32900
+;; Power8 merge instructions to allow direct move to/from floating point
32901
+;; registers in 32-bit mode. We use TF mode to get two registers to move the
32902
+;; individual 32-bit parts across. Subreg doesn't work too well on the TF
32903
+;; value, since it is allocated in reload and not all of the flow information
32904
+;; is setup for it. We have two patterns to do the two moves between gprs and
32905
+;; fprs. There isn't a dependancy between the two, but we could potentially
32906
+;; schedule other instructions between the two instructions. TFmode is
32907
+;; currently limited to traditional FPR registers. If/when this is changed, we
32908
+;; will need to revist %L to make sure it works with VSX registers, or add an
32909
+;; %x version of %L.
32911
+(define_insn "p8_fmrgow_<mode>"
32912
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
32913
+ (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
32914
+ UNSPEC_P8V_FMRGOW))]
32915
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32916
+ "fmrgow %0,%1,%L1"
32917
+ [(set_attr "type" "vecperm")])
32919
+(define_insn "p8_mtvsrwz_1"
32920
+ [(set (match_operand:TF 0 "register_operand" "=d")
32921
+ (unspec:TF [(match_operand:SI 1 "register_operand" "r")]
32922
+ UNSPEC_P8V_MTVSRWZ))]
32923
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32925
+ [(set_attr "type" "mftgpr")])
32927
+(define_insn "p8_mtvsrwz_2"
32928
+ [(set (match_operand:TF 0 "register_operand" "+d")
32929
+ (unspec:TF [(match_dup 0)
32930
+ (match_operand:SI 1 "register_operand" "r")]
32931
+ UNSPEC_P8V_MTVSRWZ))]
32932
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32934
+ [(set_attr "type" "mftgpr")])
32936
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
32937
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
32938
+ (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
32939
+ UNSPEC_P8V_RELOAD_FROM_GPR))
32940
+ (clobber (match_operand:TF 2 "register_operand" "=d"))]
32941
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32943
+ "&& reload_completed"
32946
+ rtx dest = operands[0];
32947
+ rtx src = operands[1];
32948
+ rtx tmp = operands[2];
32949
+ rtx gpr_hi_reg = gen_highpart (SImode, src);
32950
+ rtx gpr_lo_reg = gen_lowpart (SImode, src);
32952
+ emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
32953
+ emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
32954
+ emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
32957
+ [(set_attr "length" "12")
32958
+ (set_attr "type" "three")])
32960
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
32961
+(define_insn "p8_mtvsrd_1"
32962
+ [(set (match_operand:TF 0 "register_operand" "=ws")
32963
+ (unspec:TF [(match_operand:DI 1 "register_operand" "r")]
32964
+ UNSPEC_P8V_MTVSRD))]
32965
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32967
+ [(set_attr "type" "mftgpr")])
32969
+(define_insn "p8_mtvsrd_2"
32970
+ [(set (match_operand:TF 0 "register_operand" "+ws")
32971
+ (unspec:TF [(match_dup 0)
32972
+ (match_operand:DI 1 "register_operand" "r")]
32973
+ UNSPEC_P8V_MTVSRD))]
32974
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32976
+ [(set_attr "type" "mftgpr")])
32978
+(define_insn "p8_xxpermdi_<mode>"
32979
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
32980
+ (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
32981
+ UNSPEC_P8V_XXPERMDI))]
32982
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32983
+ "xxpermdi %x0,%1,%L1,0"
32984
+ [(set_attr "type" "vecperm")])
32986
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
32987
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
32988
+ (unspec:FMOVE128_GPR
32989
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
32990
+ UNSPEC_P8V_RELOAD_FROM_GPR))
32991
+ (clobber (match_operand:TF 2 "register_operand" "=ws"))]
32992
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
32994
+ "&& reload_completed"
32997
+ rtx dest = operands[0];
32998
+ rtx src = operands[1];
32999
+ rtx tmp = operands[2];
33000
+ rtx gpr_hi_reg = gen_highpart (DImode, src);
33001
+ rtx gpr_lo_reg = gen_lowpart (DImode, src);
33003
+ emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
33004
+ emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
33005
+ emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
33007
+ [(set_attr "length" "12")
33008
+ (set_attr "type" "three")])
33011
+ [(set (match_operand:FMOVE128_GPR 0 "nonimmediate_operand" "")
33012
+ (match_operand:FMOVE128_GPR 1 "input_operand" ""))]
33013
+ "reload_completed
33014
+ && (int_reg_operand (operands[0], <MODE>mode)
33015
+ || int_reg_operand (operands[1], <MODE>mode))"
33017
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
33019
+;; Move SFmode to a VSX from a GPR register. Because scalar floating point
33020
+;; type is stored internally as double precision in the VSX registers, we have
33021
+;; to convert it from the vector format.
33023
+(define_insn_and_split "reload_vsx_from_gprsf"
33024
+ [(set (match_operand:SF 0 "register_operand" "=wa")
33025
+ (unspec:SF [(match_operand:SF 1 "register_operand" "r")]
33026
+ UNSPEC_P8V_RELOAD_FROM_GPR))
33027
+ (clobber (match_operand:DI 2 "register_operand" "=r"))]
33028
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
33030
+ "&& reload_completed"
33033
+ rtx op0 = operands[0];
33034
+ rtx op1 = operands[1];
33035
+ rtx op2 = operands[2];
33036
+ rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
33037
+ rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
33039
+ /* Move SF value to upper 32-bits for xscvspdpn. */
33040
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
33041
+ emit_move_insn (op0_di, op2);
33042
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
33045
+ [(set_attr "length" "8")
33046
+ (set_attr "type" "two")])
33048
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
33049
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
33050
+;; and then doing a move of that.
33051
+(define_insn "p8_mfvsrd_3_<mode>"
33052
+ [(set (match_operand:DF 0 "register_operand" "=r")
33053
+ (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
33054
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
33055
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
33057
+ [(set_attr "type" "mftgpr")])
33059
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
33060
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
33061
+ (unspec:FMOVE128_GPR
33062
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
33063
+ UNSPEC_P8V_RELOAD_FROM_VSX))
33064
+ (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
33065
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
33067
+ "&& reload_completed"
33070
+ rtx dest = operands[0];
33071
+ rtx src = operands[1];
33072
+ rtx tmp = operands[2];
33073
+ rtx gpr_hi_reg = gen_highpart (DFmode, dest);
33074
+ rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
33076
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
33077
+ emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
33078
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
33080
+ [(set_attr "length" "12")
33081
+ (set_attr "type" "three")])
33083
+;; Move SFmode to a GPR from a VSX register. Because scalar floating point
33084
+;; type is stored internally as double precision, we have to convert it to the
33087
+(define_insn_and_split "reload_gpr_from_vsxsf"
33088
+ [(set (match_operand:SF 0 "register_operand" "=r")
33089
+ (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
33090
+ UNSPEC_P8V_RELOAD_FROM_VSX))
33091
+ (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
33092
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
33094
+ "&& reload_completed"
33097
+ rtx op0 = operands[0];
33098
+ rtx op1 = operands[1];
33099
+ rtx op2 = operands[2];
33100
+ rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
33102
+ emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
33103
+ emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
33104
+ emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
33107
+ [(set_attr "length" "12")
33108
+ (set_attr "type" "three")])
33110
+(define_insn "p8_mfvsrd_4_disf"
33111
+ [(set (match_operand:DI 0 "register_operand" "=r")
33112
+ (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
33113
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
33114
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
33116
+ [(set_attr "type" "mftgpr")])
33119
;; Next come the multi-word integer load and store and the load and store
33122
@@ -8565,8 +9578,8 @@
33123
;; Use of fprs is disparaged slightly otherwise reload prefers to reload
33124
;; a gpr into a fpr instead of reloading an invalid 'Y' address
33125
(define_insn "*movdi_internal32"
33126
- [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
33127
- (match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
33128
+ [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
33129
+ (match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
33130
"! TARGET_POWERPC64
33131
&& (gpc_reg_operand (operands[0], DImode)
33132
|| gpc_reg_operand (operands[1], DImode))"
33133
@@ -8577,15 +9590,34 @@
33138
- xxlxor %x0,%x0,%x0"
33139
- [(set_attr "type" "store,load,*,fpstore,fpload,fp,*,vecsimple")])
33141
+ [(set_attr_alternative "type"
33142
+ [(const_string "store")
33143
+ (const_string "load")
33144
+ (const_string "*")
33146
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
33147
+ (const_string "fpstore_ux")
33149
+ (match_test "update_address_mem (operands[0], VOIDmode)")
33150
+ (const_string "fpstore_u")
33151
+ (const_string "fpstore")))
33153
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
33154
+ (const_string "fpload_ux")
33156
+ (match_test "update_address_mem (operands[1], VOIDmode)")
33157
+ (const_string "fpload_u")
33158
+ (const_string "fpload")))
33159
+ (const_string "fp")
33160
+ (const_string "*")])])
33163
[(set (match_operand:DI 0 "gpc_reg_operand" "")
33164
(match_operand:DI 1 "const_int_operand" ""))]
33165
"! TARGET_POWERPC64 && reload_completed
33166
- && gpr_or_gpr_p (operands[0], operands[1])"
33167
+ && gpr_or_gpr_p (operands[0], operands[1])
33168
+ && !direct_move_p (operands[0], operands[1])"
33169
[(set (match_dup 2) (match_dup 4))
33170
(set (match_dup 3) (match_dup 1))]
33172
@@ -8607,14 +9639,15 @@
33173
[(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
33174
(match_operand:DIFD 1 "input_operand" ""))]
33175
"reload_completed && !TARGET_POWERPC64
33176
- && gpr_or_gpr_p (operands[0], operands[1])"
33177
+ && gpr_or_gpr_p (operands[0], operands[1])
33178
+ && !direct_move_p (operands[0], operands[1])"
33180
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
33182
-(define_insn "*movdi_mfpgpr"
33183
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*d")
33184
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
33185
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
33186
+(define_insn "*movdi_internal64"
33187
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
33188
+ (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
33189
+ "TARGET_POWERPC64
33190
&& (gpc_reg_operand (operands[0], DImode)
33191
|| gpc_reg_operand (operands[1], DImode))"
33193
@@ -8631,33 +9664,52 @@
33198
- [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
33199
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
33203
+ [(set_attr_alternative "type"
33205
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
33206
+ (const_string "store_ux")
33208
+ (match_test "update_address_mem (operands[0], VOIDmode)")
33209
+ (const_string "store_u")
33210
+ (const_string "store")))
33212
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
33213
+ (const_string "load_ux")
33215
+ (match_test "update_address_mem (operands[1], VOIDmode)")
33216
+ (const_string "load_u")
33217
+ (const_string "load")))
33218
+ (const_string "*")
33219
+ (const_string "*")
33220
+ (const_string "*")
33221
+ (const_string "*")
33223
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
33224
+ (const_string "fpstore_ux")
33226
+ (match_test "update_address_mem (operands[0], VOIDmode)")
33227
+ (const_string "fpstore_u")
33228
+ (const_string "fpstore")))
33230
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
33231
+ (const_string "fpload_ux")
33233
+ (match_test "update_address_mem (operands[1], VOIDmode)")
33234
+ (const_string "fpload_u")
33235
+ (const_string "fpload")))
33236
+ (const_string "fp")
33237
+ (const_string "mfjmpr")
33238
+ (const_string "mtjmpr")
33239
+ (const_string "*")
33240
+ (const_string "mftgpr")
33241
+ (const_string "mffgpr")
33242
+ (const_string "mftgpr")
33243
+ (const_string "mffgpr")])
33244
+ (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
33246
-(define_insn "*movdi_internal64"
33247
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,?wa")
33248
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,O"))]
33249
- "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
33250
- && (gpc_reg_operand (operands[0], DImode)
33251
- || gpc_reg_operand (operands[1], DImode))"
33265
- xxlxor %x0,%x0,%x0"
33266
- [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,vecsimple")
33267
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4")])
33269
;; immediate value valid for a single instruction hiding in a const_double
33271
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
33272
@@ -8719,14 +9771,16 @@
33276
-;; TImode is similar, except that we usually want to compute the address into
33277
-;; a register and use lsi/stsi (the exception is during reload).
33278
+;; TImode/PTImode is similar, except that we usually want to compute the
33279
+;; address into a register and use lsi/stsi (the exception is during reload).
33281
-(define_insn "*movti_string"
33282
- [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
33283
- (match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))]
33284
+(define_insn "*mov<mode>_string"
33285
+ [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
33286
+ (match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))]
33287
"! TARGET_POWERPC64
33288
- && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
33289
+ && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
33290
+ && (gpc_reg_operand (operands[0], <MODE>mode)
33291
+ || gpc_reg_operand (operands[1], <MODE>mode))"
33294
switch (which_alternative)
33295
@@ -8756,27 +9810,32 @@
33296
(const_string "always")
33297
(const_string "conditional")))])
33299
-(define_insn "*movti_ppc64"
33300
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Y,r,r")
33301
- (match_operand:TI 1 "input_operand" "r,Y,r"))]
33302
- "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
33303
- || gpc_reg_operand (operands[1], TImode)))
33304
- && VECTOR_MEM_NONE_P (TImode)"
33306
- [(set_attr "type" "store,load,*")])
33307
+(define_insn "*mov<mode>_ppc64"
33308
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
33309
+ (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
33310
+ "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
33311
+ && (gpc_reg_operand (operands[0], <MODE>mode)
33312
+ || gpc_reg_operand (operands[1], <MODE>mode)))"
33314
+ return rs6000_output_move_128bit (operands);
33316
+ [(set_attr "type" "store,store,load,load,*,*")
33317
+ (set_attr "length" "8")])
33320
- [(set (match_operand:TI 0 "gpc_reg_operand" "")
33321
- (match_operand:TI 1 "const_double_operand" ""))]
33322
- "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
33323
+ [(set (match_operand:TI2 0 "int_reg_operand" "")
33324
+ (match_operand:TI2 1 "const_double_operand" ""))]
33325
+ "TARGET_POWERPC64
33326
+ && (VECTOR_MEM_NONE_P (<MODE>mode)
33327
+ || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
33328
[(set (match_dup 2) (match_dup 4))
33329
(set (match_dup 3) (match_dup 5))]
33332
operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
33335
operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
33338
if (GET_CODE (operands[1]) == CONST_DOUBLE)
33340
operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
33341
@@ -8792,10 +9851,12 @@
33345
- [(set (match_operand:TI 0 "nonimmediate_operand" "")
33346
- (match_operand:TI 1 "input_operand" ""))]
33347
- "reload_completed && VECTOR_MEM_NONE_P (TImode)
33348
- && gpr_or_gpr_p (operands[0], operands[1])"
33349
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "")
33350
+ (match_operand:TI2 1 "input_operand" ""))]
33351
+ "reload_completed
33352
+ && gpr_or_gpr_p (operands[0], operands[1])
33353
+ && !direct_move_p (operands[0], operands[1])
33354
+ && !quad_load_store_p (operands[0], operands[1])"
33356
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
33358
@@ -9651,7 +10712,7 @@
33359
(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
33361
(clobber (reg:SI LR_REGNO))]
33362
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
33363
+ "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
33365
if (TARGET_CMODEL != CMODEL_SMALL)
33366
return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;"
33367
@@ -9759,7 +10820,8 @@
33368
(unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
33370
(clobber (reg:SI LR_REGNO))]
33371
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
33372
+ "HAVE_AS_TLS && TARGET_TLS_MARKERS
33373
+ && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
33374
"bl %z1(%3@tlsgd)\;nop"
33375
[(set_attr "type" "branch")
33376
(set_attr "length" "8")])
33377
@@ -9791,7 +10853,7 @@
33378
(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
33380
(clobber (reg:SI LR_REGNO))]
33381
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
33382
+ "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
33384
if (TARGET_CMODEL != CMODEL_SMALL)
33385
return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;"
33386
@@ -9892,7 +10954,8 @@
33387
(match_operand 2 "" "g")))
33388
(unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
33389
(clobber (reg:SI LR_REGNO))]
33390
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
33391
+ "HAVE_AS_TLS && TARGET_TLS_MARKERS
33392
+ && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
33393
"bl %z1(%&@tlsld)\;nop"
33394
[(set_attr "type" "branch")
33395
(set_attr "length" "8")])
33396
@@ -10261,7 +11324,7 @@
33397
[(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
33398
(unspec:SI [(const_int 0)] UNSPEC_TOC))
33399
(use (reg:SI 2))])]
33400
- "DEFAULT_ABI == ABI_AIX && TARGET_32BIT"
33401
+ "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT"
33405
@@ -10276,7 +11339,7 @@
33406
[(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
33407
(unspec:DI [(const_int 0)] UNSPEC_TOC))
33408
(use (reg:DI 2))])]
33409
- "DEFAULT_ABI == ABI_AIX && TARGET_64BIT"
33410
+ "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT"
33414
@@ -10306,7 +11369,7 @@
33415
[(parallel [(set (reg:SI LR_REGNO)
33416
(match_operand:SI 0 "immediate_operand" "s"))
33417
(use (unspec [(match_dup 0)] UNSPEC_TOC))])]
33418
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX
33419
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4
33420
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
33423
@@ -10314,7 +11377,7 @@
33424
[(set (reg:SI LR_REGNO)
33425
(match_operand:SI 0 "immediate_operand" "s"))
33426
(use (unspec [(match_dup 0)] UNSPEC_TOC))]
33427
- "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
33428
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
33429
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
33430
"bcl 20,31,%0\\n%0:"
33431
[(set_attr "type" "branch")
33432
@@ -10324,7 +11387,7 @@
33433
[(set (reg:SI LR_REGNO)
33434
(match_operand:SI 0 "immediate_operand" "s"))
33435
(use (unspec [(match_dup 0)] UNSPEC_TOC))]
33436
- "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
33437
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
33438
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
33441
@@ -10344,7 +11407,7 @@
33442
(label_ref (match_operand 1 "" ""))]
33445
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
33446
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
33449
(define_insn "load_toc_v4_PIC_1b_normal"
33450
@@ -10353,7 +11416,7 @@
33451
(label_ref (match_operand 1 "" ""))]
33454
- "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
33455
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
33456
"bcl 20,31,$+8\;.long %0-$"
33457
[(set_attr "type" "branch")
33458
(set_attr "length" "8")])
33459
@@ -10364,7 +11427,7 @@
33460
(label_ref (match_operand 1 "" ""))]
33463
- "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
33464
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
33468
@@ -10382,7 +11445,7 @@
33469
(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
33470
(minus:SI (match_operand:SI 2 "immediate_operand" "s")
33471
(match_operand:SI 3 "immediate_operand" "s")))))]
33472
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
33473
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
33475
[(set_attr "type" "load")])
33477
@@ -10392,7 +11455,7 @@
33479
(minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
33480
(match_operand:SI 3 "symbol_ref_operand" "s")))))]
33481
- "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
33482
+ "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
33483
"addis %0,%1,%2-%3@ha")
33485
(define_insn "load_toc_v4_PIC_3c"
33486
@@ -10400,7 +11463,7 @@
33487
(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
33488
(minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
33489
(match_operand:SI 3 "symbol_ref_operand" "s"))))]
33490
- "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
33491
+ "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
33492
"addi %0,%1,%2-%3@l")
33494
;; If the TOC is shared over a translation unit, as happens with all
33495
@@ -10542,8 +11605,13 @@
33497
operands[0] = XEXP (operands[0], 0);
33499
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33501
+ rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]);
33505
if (GET_CODE (operands[0]) != SYMBOL_REF
33506
- || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[0]))
33507
|| (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
33509
if (INTVAL (operands[2]) & CALL_LONG)
33510
@@ -10556,12 +11624,6 @@
33511
operands[0] = force_reg (Pmode, operands[0]);
33515
- /* AIX function pointers are really pointers to a three word
33517
- rs6000_call_indirect_aix (NULL_RTX, operands[0], operands[1]);
33521
gcc_unreachable ();
33523
@@ -10587,8 +11649,13 @@
33525
operands[1] = XEXP (operands[1], 0);
33527
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33529
+ rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]);
33533
if (GET_CODE (operands[1]) != SYMBOL_REF
33534
- || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[1]))
33535
|| (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
33537
if (INTVAL (operands[3]) & CALL_LONG)
33538
@@ -10601,12 +11668,6 @@
33539
operands[1] = force_reg (Pmode, operands[1]);
33543
- /* AIX function pointers are really pointers to a three word
33545
- rs6000_call_indirect_aix (operands[0], operands[1], operands[2]);
33549
gcc_unreachable ();
33551
@@ -10698,136 +11759,7 @@
33552
[(set_attr "type" "branch")
33553
(set_attr "length" "4,8")])
33555
-;; Call to indirect functions with the AIX abi using a 3 word descriptor.
33556
-;; Operand0 is the addresss of the function to call
33557
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
33558
-;; Operand2 is the location in the function descriptor to load r2 from
33559
-;; Operand3 is the stack location to hold the current TOC pointer
33561
-(define_insn "call_indirect_aix<ptrsize>"
33562
- [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
33563
- (match_operand 1 "" "g,g"))
33564
- (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
33565
- (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33566
- (use (reg:P STATIC_CHAIN_REGNUM))
33567
- (clobber (reg:P LR_REGNO))]
33568
- "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
33569
- "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
33570
- [(set_attr "type" "jmpreg")
33571
- (set_attr "length" "12")])
33573
-;; Like call_indirect_aix<ptrsize>, but no use of the static chain
33574
-;; Operand0 is the addresss of the function to call
33575
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
33576
-;; Operand2 is the location in the function descriptor to load r2 from
33577
-;; Operand3 is the stack location to hold the current TOC pointer
33579
-(define_insn "call_indirect_aix<ptrsize>_nor11"
33580
- [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
33581
- (match_operand 1 "" "g,g"))
33582
- (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
33583
- (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33584
- (clobber (reg:P LR_REGNO))]
33585
- "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
33586
- "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
33587
- [(set_attr "type" "jmpreg")
33588
- (set_attr "length" "12")])
33590
-;; Operand0 is the return result of the function
33591
-;; Operand1 is the addresss of the function to call
33592
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
33593
-;; Operand3 is the location in the function descriptor to load r2 from
33594
-;; Operand4 is the stack location to hold the current TOC pointer
33596
-(define_insn "call_value_indirect_aix<ptrsize>"
33597
- [(set (match_operand 0 "" "")
33598
- (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
33599
- (match_operand 2 "" "g,g")))
33600
- (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33601
- (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
33602
- (use (reg:P STATIC_CHAIN_REGNUM))
33603
- (clobber (reg:P LR_REGNO))]
33604
- "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
33605
- "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
33606
- [(set_attr "type" "jmpreg")
33607
- (set_attr "length" "12")])
33609
-;; Like call_value_indirect_aix<ptrsize>, but no use of the static chain
33610
-;; Operand0 is the return result of the function
33611
-;; Operand1 is the addresss of the function to call
33612
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
33613
-;; Operand3 is the location in the function descriptor to load r2 from
33614
-;; Operand4 is the stack location to hold the current TOC pointer
33616
-(define_insn "call_value_indirect_aix<ptrsize>_nor11"
33617
- [(set (match_operand 0 "" "")
33618
- (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
33619
- (match_operand 2 "" "g,g")))
33620
- (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33621
- (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
33622
- (clobber (reg:P LR_REGNO))]
33623
- "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
33624
- "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
33625
- [(set_attr "type" "jmpreg")
33626
- (set_attr "length" "12")])
33628
-;; Call to function which may be in another module. Restore the TOC
33629
-;; pointer (r2) after the call unless this is System V.
33630
-;; Operand2 is nonzero if we are using the V.4 calling sequence and
33631
-;; either the function was not prototyped, or it was prototyped as a
33632
-;; variable argument function. It is > 0 if FP registers were passed
33633
-;; and < 0 if they were not.
33635
-(define_insn "*call_nonlocal_aix32"
33636
- [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
33637
- (match_operand 1 "" "g"))
33638
- (use (match_operand:SI 2 "immediate_operand" "O"))
33639
- (clobber (reg:SI LR_REGNO))]
33641
- && DEFAULT_ABI == ABI_AIX
33642
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
33644
- [(set_attr "type" "branch")
33645
- (set_attr "length" "8")])
33647
-(define_insn "*call_nonlocal_aix64"
33648
- [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
33649
- (match_operand 1 "" "g"))
33650
- (use (match_operand:SI 2 "immediate_operand" "O"))
33651
- (clobber (reg:SI LR_REGNO))]
33653
- && DEFAULT_ABI == ABI_AIX
33654
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
33656
- [(set_attr "type" "branch")
33657
- (set_attr "length" "8")])
33659
-(define_insn "*call_value_nonlocal_aix32"
33660
- [(set (match_operand 0 "" "")
33661
- (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
33662
- (match_operand 2 "" "g")))
33663
- (use (match_operand:SI 3 "immediate_operand" "O"))
33664
- (clobber (reg:SI LR_REGNO))]
33666
- && DEFAULT_ABI == ABI_AIX
33667
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
33669
- [(set_attr "type" "branch")
33670
- (set_attr "length" "8")])
33672
-(define_insn "*call_value_nonlocal_aix64"
33673
- [(set (match_operand 0 "" "")
33674
- (call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
33675
- (match_operand 2 "" "g")))
33676
- (use (match_operand:SI 3 "immediate_operand" "O"))
33677
- (clobber (reg:SI LR_REGNO))]
33679
- && DEFAULT_ABI == ABI_AIX
33680
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
33682
- [(set_attr "type" "branch")
33683
- (set_attr "length" "8")])
33685
;; A function pointer under System V is just a normal pointer
33686
;; operands[0] is the function pointer
33687
;; operands[1] is the stack size to clean up
33688
@@ -11009,6 +11941,104 @@
33689
[(set_attr "type" "branch,branch")
33690
(set_attr "length" "4,8")])
33693
+;; Call to AIX abi function in the same module.
33695
+(define_insn "*call_local_aix<mode>"
33696
+ [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s"))
33697
+ (match_operand 1 "" "g"))
33698
+ (clobber (reg:P LR_REGNO))]
33699
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33701
+ [(set_attr "type" "branch")
33702
+ (set_attr "length" "4")])
33704
+(define_insn "*call_value_local_aix<mode>"
33705
+ [(set (match_operand 0 "" "")
33706
+ (call (mem:SI (match_operand:P 1 "current_file_function_operand" "s"))
33707
+ (match_operand 2 "" "g")))
33708
+ (clobber (reg:P LR_REGNO))]
33709
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33711
+ [(set_attr "type" "branch")
33712
+ (set_attr "length" "4")])
33714
+;; Call to AIX abi function which may be in another module.
33715
+;; Restore the TOC pointer (r2) after the call.
33717
+(define_insn "*call_nonlocal_aix<mode>"
33718
+ [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s"))
33719
+ (match_operand 1 "" "g"))
33720
+ (clobber (reg:P LR_REGNO))]
33721
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33723
+ [(set_attr "type" "branch")
33724
+ (set_attr "length" "8")])
33726
+(define_insn "*call_value_nonlocal_aix<mode>"
33727
+ [(set (match_operand 0 "" "")
33728
+ (call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s"))
33729
+ (match_operand 2 "" "g")))
33730
+ (clobber (reg:P LR_REGNO))]
33731
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33733
+ [(set_attr "type" "branch")
33734
+ (set_attr "length" "8")])
33736
+;; Call to indirect functions with the AIX abi using a 3 word descriptor.
33737
+;; Operand0 is the addresss of the function to call
33738
+;; Operand2 is the location in the function descriptor to load r2 from
33739
+;; Operand3 is the stack location to hold the current TOC pointer
33741
+(define_insn "*call_indirect_aix<mode>"
33742
+ [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
33743
+ (match_operand 1 "" "g,g"))
33744
+ (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
33745
+ (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33746
+ (clobber (reg:P LR_REGNO))]
33747
+ "DEFAULT_ABI == ABI_AIX"
33748
+ "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
33749
+ [(set_attr "type" "jmpreg")
33750
+ (set_attr "length" "12")])
33752
+(define_insn "*call_value_indirect_aix<mode>"
33753
+ [(set (match_operand 0 "" "")
33754
+ (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
33755
+ (match_operand 2 "" "g,g")))
33756
+ (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33757
+ (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
33758
+ (clobber (reg:P LR_REGNO))]
33759
+ "DEFAULT_ABI == ABI_AIX"
33760
+ "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
33761
+ [(set_attr "type" "jmpreg")
33762
+ (set_attr "length" "12")])
33764
+;; Call to indirect functions with the ELFv2 ABI.
33765
+;; Operand0 is the addresss of the function to call
33766
+;; Operand2 is the stack location to hold the current TOC pointer
33768
+(define_insn "*call_indirect_elfv2<mode>"
33769
+ [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
33770
+ (match_operand 1 "" "g,g"))
33771
+ (set (reg:P TOC_REGNUM) (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
33772
+ (clobber (reg:P LR_REGNO))]
33773
+ "DEFAULT_ABI == ABI_ELFv2"
33774
+ "b%T0l\;<ptrload> 2,%2"
33775
+ [(set_attr "type" "jmpreg")
33776
+ (set_attr "length" "8")])
33778
+(define_insn "*call_value_indirect_elfv2<mode>"
33779
+ [(set (match_operand 0 "" "")
33780
+ (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
33781
+ (match_operand 2 "" "g,g")))
33782
+ (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
33783
+ (clobber (reg:P LR_REGNO))]
33784
+ "DEFAULT_ABI == ABI_ELFv2"
33785
+ "b%T1l\;<ptrload> 2,%3"
33786
+ [(set_attr "type" "jmpreg")
33787
+ (set_attr "length" "8")])
33790
;; Call subroutine returning any type.
33791
(define_expand "untyped_call"
33792
[(parallel [(call (match_operand 0 "" "")
33793
@@ -11056,8 +12086,41 @@
33794
gcc_assert (GET_CODE (operands[1]) == CONST_INT);
33796
operands[0] = XEXP (operands[0], 0);
33798
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33800
+ rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]);
33805
+(define_expand "sibcall_value"
33806
+ [(parallel [(set (match_operand 0 "register_operand" "")
33807
+ (call (mem:SI (match_operand 1 "address_operand" ""))
33808
+ (match_operand 2 "" "")))
33809
+ (use (match_operand 3 "" ""))
33810
+ (use (reg:SI LR_REGNO))
33811
+ (simple_return)])]
33816
+ if (MACHOPIC_INDIRECT)
33817
+ operands[1] = machopic_indirect_call_target (operands[1]);
33820
+ gcc_assert (GET_CODE (operands[1]) == MEM);
33821
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
33823
+ operands[1] = XEXP (operands[1], 0);
33825
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33827
+ rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]);
33832
;; this and similar patterns must be marked as using LR, otherwise
33833
;; dataflow will try to delete the store into it. This is true
33834
;; even when the actual reg to jump to is in CTR, when LR was
33835
@@ -11123,7 +12186,6 @@
33836
[(set_attr "type" "branch")
33837
(set_attr "length" "4,8")])
33840
(define_insn "*sibcall_value_local64"
33841
[(set (match_operand 0 "" "")
33842
(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
33843
@@ -11145,35 +12207,6 @@
33844
[(set_attr "type" "branch")
33845
(set_attr "length" "4,8")])
33847
-(define_insn "*sibcall_nonlocal_aix<mode>"
33848
- [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
33849
- (match_operand 1 "" "g,g"))
33850
- (use (match_operand:SI 2 "immediate_operand" "O,O"))
33851
- (use (reg:SI LR_REGNO))
33853
- "DEFAULT_ABI == ABI_AIX
33854
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
33858
- [(set_attr "type" "branch")
33859
- (set_attr "length" "4")])
33861
-(define_insn "*sibcall_value_nonlocal_aix<mode>"
33862
- [(set (match_operand 0 "" "")
33863
- (call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
33864
- (match_operand 2 "" "g,g")))
33865
- (use (match_operand:SI 3 "immediate_operand" "O,O"))
33866
- (use (reg:SI LR_REGNO))
33868
- "DEFAULT_ABI == ABI_AIX
33869
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
33873
- [(set_attr "type" "branch")
33874
- (set_attr "length" "4")])
33876
(define_insn "*sibcall_nonlocal_sysv<mode>"
33877
[(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c"))
33878
(match_operand 1 "" ""))
33879
@@ -11204,27 +12237,6 @@
33880
[(set_attr "type" "branch")
33881
(set_attr "length" "4,8,4,8")])
33883
-(define_expand "sibcall_value"
33884
- [(parallel [(set (match_operand 0 "register_operand" "")
33885
- (call (mem:SI (match_operand 1 "address_operand" ""))
33886
- (match_operand 2 "" "")))
33887
- (use (match_operand 3 "" ""))
33888
- (use (reg:SI LR_REGNO))
33889
- (simple_return)])]
33894
- if (MACHOPIC_INDIRECT)
33895
- operands[1] = machopic_indirect_call_target (operands[1]);
33898
- gcc_assert (GET_CODE (operands[1]) == MEM);
33899
- gcc_assert (GET_CODE (operands[2]) == CONST_INT);
33901
- operands[1] = XEXP (operands[1], 0);
33904
(define_insn "*sibcall_value_nonlocal_sysv<mode>"
33905
[(set (match_operand 0 "" "")
33906
(call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c"))
33907
@@ -11256,6 +12268,31 @@
33908
[(set_attr "type" "branch")
33909
(set_attr "length" "4,8,4,8")])
33911
+;; AIX ABI sibling call patterns.
33913
+(define_insn "*sibcall_aix<mode>"
33914
+ [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
33915
+ (match_operand 1 "" "g,g"))
33917
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33921
+ [(set_attr "type" "branch")
33922
+ (set_attr "length" "4")])
33924
+(define_insn "*sibcall_value_aix<mode>"
33925
+ [(set (match_operand 0 "" "")
33926
+ (call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
33927
+ (match_operand 2 "" "g,g")))
33929
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
33933
+ [(set_attr "type" "branch")
33934
+ (set_attr "length" "4")])
33936
(define_expand "sibcall_epilogue"
33937
[(use (const_int 0))]
33939
@@ -11294,7 +12331,14 @@
33940
operands[1] = gen_rtx_REG (Pmode, 0);
33941
return "st<wd>%U0%X0 %1,%0";
33943
- [(set_attr "type" "store")
33944
+ [(set (attr "type")
33946
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
33947
+ (const_string "store_ux")
33949
+ (match_test "update_address_mem (operands[0], VOIDmode)")
33950
+ (const_string "store_u")
33951
+ (const_string "store"))))
33952
(set_attr "length" "4")])
33954
(define_insn "probe_stack_range<P:mode>"
33955
@@ -11589,23 +12633,6 @@
33956
[(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
33957
(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
33959
-(define_insn "*cmpsf_internal1"
33960
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
33961
- (compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "f")
33962
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
33963
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
33965
- [(set_attr "type" "fpcompare")])
33967
-(define_insn "*cmpdf_internal1"
33968
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
33969
- (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
33970
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
33971
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
33972
- && !VECTOR_UNIT_VSX_P (DFmode)"
33974
- [(set_attr "type" "fpcompare")])
33976
;; Only need to compare second words if first words equal
33977
(define_insn "*cmptf_internal1"
33978
[(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
33979
@@ -13501,6 +14528,14 @@
33981
[(set_attr "type" "mfcr")])
33983
+(define_insn "*crsave"
33984
+ [(match_parallel 0 "crsave_operation"
33985
+ [(set (match_operand:SI 1 "memory_operand" "=m")
33986
+ (match_operand:SI 2 "gpc_reg_operand" "r"))])]
33989
+ [(set_attr "type" "store")])
33991
(define_insn "*stmw"
33992
[(match_parallel 0 "stmw_operation"
33993
[(set (match_operand:SI 1 "memory_operand" "=m")
33994
@@ -13885,7 +14920,7 @@
33995
(match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
33998
- [(set_attr "type" "integer")])
33999
+ [(set_attr "type" "popcnt")])
34002
;; Builtin fma support. Handle
34003
@@ -13900,6 +14935,20 @@
34007
+(define_insn "*fma<mode>4_fpr"
34008
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
34010
+ (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>,<Fv>")
34011
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
34012
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))]
34013
+ "TARGET_<MODE>_FPR"
34015
+ fmadd<Ftrad> %0,%1,%2,%3
34016
+ xsmadda<Fvsx> %x0,%x1,%x2
34017
+ xsmaddm<Fvsx> %x0,%x1,%x3"
34018
+ [(set_attr "type" "fp")
34019
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
34021
; Altivec only has fma and nfms.
34022
(define_expand "fms<mode>4"
34023
[(set (match_operand:FMA_F 0 "register_operand" "")
34024
@@ -13910,6 +14959,20 @@
34025
"!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
34028
+(define_insn "*fms<mode>4_fpr"
34029
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
34031
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
34032
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
34033
+ (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
34034
+ "TARGET_<MODE>_FPR"
34036
+ fmsub<Ftrad> %0,%1,%2,%3
34037
+ xsmsuba<Fvsx> %x0,%x1,%x2
34038
+ xsmsubm<Fvsx> %x0,%x1,%x3"
34039
+ [(set_attr "type" "fp")
34040
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
34042
;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
34043
(define_expand "fnma<mode>4"
34044
[(set (match_operand:FMA_F 0 "register_operand" "")
34045
@@ -13943,6 +15006,21 @@
34046
"!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
34049
+(define_insn "*nfma<mode>4_fpr"
34050
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
34053
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
34054
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
34055
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
34056
+ "TARGET_<MODE>_FPR"
34058
+ fnmadd<Ftrad> %0,%1,%2,%3
34059
+ xsnmadda<Fvsx> %x0,%x1,%x2
34060
+ xsnmaddm<Fvsx> %x0,%x1,%x3"
34061
+ [(set_attr "type" "fp")
34062
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
34064
; Not an official optab name, but used from builtins.
34065
(define_expand "nfms<mode>4"
34066
[(set (match_operand:FMA_F 0 "register_operand" "")
34067
@@ -13954,6 +15032,23 @@
34071
+(define_insn "*nfmssf4_fpr"
34072
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
34075
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
34076
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
34078
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))))]
34079
+ "TARGET_<MODE>_FPR"
34081
+ fnmsub<Ftrad> %0,%1,%2,%3
34082
+ xsnmsuba<Fvsx> %x0,%x1,%x2
34083
+ xsnmsubm<Fvsx> %x0,%x1,%x3"
34084
+ [(set_attr "type" "fp")
34085
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
34088
(define_expand "rs6000_get_timebase"
34089
[(use (match_operand:DI 0 "gpc_reg_operand" ""))]
34091
@@ -14020,7 +15115,44 @@
34095
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
34096
+;; a GPR. The addis instruction must be adjacent to the load, and use the same
34097
+;; register that is being loaded. The fused ops must be physically adjacent.
34099
+;; We use define_peephole for the actual addis/load, and the register used to
34100
+;; hold the addis value must be the same as the register being loaded. We use
34101
+;; define_peephole2 to change the register used for addis to be the register
34102
+;; being loaded, since we can look at whether it is dead after the load insn.
34105
+ [(set (match_operand:P 0 "base_reg_operand" "")
34106
+ (match_operand:P 1 "fusion_gpr_addis" ""))
34107
+ (set (match_operand:INT1 2 "base_reg_operand" "")
34108
+ (match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
34109
+ "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
34111
+ return emit_fusion_gpr_load (operands);
34113
+ [(set_attr "type" "load")
34114
+ (set_attr "length" "8")])
34117
+ [(set (match_operand:P 0 "base_reg_operand" "")
34118
+ (match_operand:P 1 "fusion_gpr_addis" ""))
34119
+ (set (match_operand:INT1 2 "base_reg_operand" "")
34120
+ (match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
34121
+ "TARGET_P8_FUSION
34122
+ && (REGNO (operands[0]) != REGNO (operands[2])
34123
+ || GET_CODE (operands[3]) == SIGN_EXTEND)
34124
+ && fusion_gpr_load_p (operands, true)"
34127
+ expand_fusion_gpr_load (operands);
34133
(include "sync.md")
34134
(include "vector.md")
34136
@@ -14028,3 +15160,5 @@
34139
(include "paired.md")
34140
+(include "crypto.md")
34141
+(include "htm.md")
34142
--- a/src/gcc/config/rs6000/t-linux64le
34143
+++ b/src/gcc/config/rs6000/t-linux64le
34145
+#rs6000/t-linux64le
34147
+MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))
34148
--- a/src/gcc/config/rs6000/t-linux64lebe
34149
+++ b/src/gcc/config/rs6000/t-linux64lebe
34151
+#rs6000/t-linux64leend
34153
+MULTILIB_OPTIONS += mbig
34154
+MULTILIB_DIRNAMES += be
34155
+MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES))))))
34156
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES)))
34157
+MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN}
34158
--- a/src/gcc/config/rs6000/rs6000-opts.h
34159
+++ b/src/gcc/config/rs6000/rs6000-opts.h
34169
/* FP processor type. */
34170
@@ -100,7 +101,8 @@
34171
/* Enumeration to give which calling sequence to use. */
34174
- ABI_AIX, /* IBM's AIX */
34175
+ ABI_AIX, /* IBM's AIX, or Linux ELFv1 */
34176
+ ABI_ELFv2, /* Linux ELFv2 ABI */
34177
ABI_V4, /* System V.4/eabi */
34178
ABI_DARWIN /* Apple's Darwin (OS X kernel) */
34180
@@ -131,11 +133,14 @@
34184
-/* Describe which vector unit to use for a given machine mode. */
34185
+/* Describe which vector unit to use for a given machine mode. The
34186
+ VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
34187
+ P8_VECTOR are contiguous. */
34188
enum rs6000_vector {
34189
VECTOR_NONE, /* Type is not a vector or not supported */
34190
VECTOR_ALTIVEC, /* Use altivec for vector processing */
34191
VECTOR_VSX, /* Use VSX for vector processing */
34192
+ VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */
34193
VECTOR_PAIRED, /* Use paired floating point for vectors */
34194
VECTOR_SPE, /* Use SPE for vector processing */
34195
VECTOR_OTHER /* Some other vector unit */
34196
--- a/src/gcc/config/rs6000/option-defaults.h
34197
+++ b/src/gcc/config/rs6000/option-defaults.h
34199
--with-float is ignored if -mhard-float or -msoft-float are
34201
#define OPTION_DEFAULT_SPECS \
34202
+ {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \
34203
{"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
34204
{"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
34205
{"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
34206
--- a/src/gcc/config/rs6000/driver-rs6000.c
34207
+++ b/src/gcc/config/rs6000/driver-rs6000.c
34208
@@ -167,7 +167,7 @@
34213
+ static char buf[1024];
34217
--- a/src/gcc/config/rs6000/altivec.h
34218
+++ b/src/gcc/config/rs6000/altivec.h
34219
@@ -321,6 +321,42 @@
34220
#define vec_vsx_st __builtin_vec_vsx_st
34224
+/* Vector additions added in ISA 2.07. */
34225
+#define vec_eqv __builtin_vec_eqv
34226
+#define vec_nand __builtin_vec_nand
34227
+#define vec_orc __builtin_vec_orc
34228
+#define vec_vaddudm __builtin_vec_vaddudm
34229
+#define vec_vclz __builtin_vec_vclz
34230
+#define vec_vclzb __builtin_vec_vclzb
34231
+#define vec_vclzd __builtin_vec_vclzd
34232
+#define vec_vclzh __builtin_vec_vclzh
34233
+#define vec_vclzw __builtin_vec_vclzw
34234
+#define vec_vgbbd __builtin_vec_vgbbd
34235
+#define vec_vmaxsd __builtin_vec_vmaxsd
34236
+#define vec_vmaxud __builtin_vec_vmaxud
34237
+#define vec_vminsd __builtin_vec_vminsd
34238
+#define vec_vminud __builtin_vec_vminud
34239
+#define vec_vmrgew __builtin_vec_vmrgew
34240
+#define vec_vmrgow __builtin_vec_vmrgow
34241
+#define vec_vpksdss __builtin_vec_vpksdss
34242
+#define vec_vpksdus __builtin_vec_vpksdus
34243
+#define vec_vpkudum __builtin_vec_vpkudum
34244
+#define vec_vpkudus __builtin_vec_vpkudus
34245
+#define vec_vpopcnt __builtin_vec_vpopcnt
34246
+#define vec_vpopcntb __builtin_vec_vpopcntb
34247
+#define vec_vpopcntd __builtin_vec_vpopcntd
34248
+#define vec_vpopcnth __builtin_vec_vpopcnth
34249
+#define vec_vpopcntw __builtin_vec_vpopcntw
34250
+#define vec_vrld __builtin_vec_vrld
34251
+#define vec_vsld __builtin_vec_vsld
34252
+#define vec_vsrad __builtin_vec_vsrad
34253
+#define vec_vsrd __builtin_vec_vsrd
34254
+#define vec_vsubudm __builtin_vec_vsubudm
34255
+#define vec_vupkhsw __builtin_vec_vupkhsw
34256
+#define vec_vupklsw __builtin_vec_vupklsw
34260
For C++, we use templates in order to allow non-parenthesized arguments.
34261
For C, instead, we use macros since non-parenthesized arguments were
34262
--- a/src/gcc/config/rs6000/sysv4.h
34263
+++ b/src/gcc/config/rs6000/sysv4.h
34265
& (OPTION_MASK_RELOCATABLE \
34266
| OPTION_MASK_MINIMAL_TOC)) \
34268
- || DEFAULT_ABI == ABI_AIX)
34269
+ || DEFAULT_ABI != ABI_V4)
34271
#define TARGET_BITFIELD_TYPE (! TARGET_NO_BITFIELD_TYPE)
34272
#define TARGET_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
34273
@@ -147,7 +147,7 @@
34274
rs6000_sdata_name); \
34277
- else if (flag_pic && DEFAULT_ABI != ABI_AIX \
34278
+ else if (flag_pic && DEFAULT_ABI == ABI_V4 \
34279
&& (rs6000_sdata == SDATA_EABI \
34280
|| rs6000_sdata == SDATA_SYSV)) \
34282
@@ -173,7 +173,7 @@
34283
error ("-mrelocatable and -mno-minimal-toc are incompatible"); \
34286
- if (TARGET_RELOCATABLE && rs6000_current_abi == ABI_AIX) \
34287
+ if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4) \
34289
rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
34290
error ("-mrelocatable and -mcall-%s are incompatible", \
34291
@@ -180,7 +180,7 @@
34292
rs6000_abi_name); \
34295
- if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi == ABI_AIX) \
34296
+ if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4) \
34299
error ("-fPIC and -mcall-%s are incompatible", \
34300
@@ -193,7 +193,7 @@
34303
/* Treat -fPIC the same as -mrelocatable. */ \
34304
- if (flag_pic > 1 && DEFAULT_ABI != ABI_AIX) \
34305
+ if (flag_pic > 1 && DEFAULT_ABI == ABI_V4) \
34307
rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
34308
TARGET_NO_FP_IN_TOC = 1; \
34309
@@ -317,7 +317,7 @@
34311
/* Put PC relative got entries in .got2. */
34312
#define MINIMAL_TOC_SECTION_ASM_OP \
34313
- (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI != ABI_AIX) \
34314
+ (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4) \
34315
? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
34317
#define SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
34318
@@ -522,8 +522,6 @@
34319
#define ENDIAN_SELECT(BIG_OPT, LITTLE_OPT, DEFAULT_OPT) \
34320
"%{mlittle|mlittle-endian:" LITTLE_OPT ";" \
34321
"mbig|mbig-endian:" BIG_OPT ";" \
34322
- "mcall-aixdesc|mcall-freebsd|mcall-netbsd|" \
34323
- "mcall-openbsd|mcall-linux:" BIG_OPT ";" \
34324
"mcall-i960-old:" LITTLE_OPT ";" \
34325
":" DEFAULT_OPT "}"
34327
@@ -536,25 +534,12 @@
34328
%{memb|msdata=eabi: -memb}" \
34329
ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
34331
-#define CC1_ENDIAN_BIG_SPEC ""
34333
-#define CC1_ENDIAN_LITTLE_SPEC "\
34334
-%{!mstrict-align: %{!mno-strict-align: \
34335
- %{!mcall-i960-old: \
34340
-#define CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_big)"
34342
#ifndef CC1_SECURE_PLT_DEFAULT_SPEC
34343
#define CC1_SECURE_PLT_DEFAULT_SPEC ""
34346
-/* Pass -G xxx to the compiler and set correct endian mode. */
34347
+/* Pass -G xxx to the compiler. */
34348
#define CC1_SPEC "%{G*} %(cc1_cpu)" \
34349
- ENDIAN_SELECT(" %(cc1_endian_big)", " %(cc1_endian_little)", \
34350
- " %(cc1_endian_default)") \
34351
"%{meabi: %{!mcall-*: -mcall-sysv }} \
34352
%{!meabi: %{!mno-eabi: \
34353
%{mrelocatable: -meabi } \
34354
@@ -908,9 +893,6 @@
34355
{ "link_os_netbsd", LINK_OS_NETBSD_SPEC }, \
34356
{ "link_os_openbsd", LINK_OS_OPENBSD_SPEC }, \
34357
{ "link_os_default", LINK_OS_DEFAULT_SPEC }, \
34358
- { "cc1_endian_big", CC1_ENDIAN_BIG_SPEC }, \
34359
- { "cc1_endian_little", CC1_ENDIAN_LITTLE_SPEC }, \
34360
- { "cc1_endian_default", CC1_ENDIAN_DEFAULT_SPEC }, \
34361
{ "cc1_secure_plt_default", CC1_SECURE_PLT_DEFAULT_SPEC }, \
34362
{ "cpp_os_ads", CPP_OS_ADS_SPEC }, \
34363
{ "cpp_os_yellowknife", CPP_OS_YELLOWKNIFE_SPEC }, \
34364
--- a/src/libgo/configure
34365
+++ b/src/libgo/configure
34366
@@ -6501,7 +6501,7 @@
34370
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
34371
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
34372
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
34373
# Find out which ABI we are using.
34374
echo 'int i;' > conftest.$ac_ext
34375
@@ -6519,7 +6519,10 @@
34377
LD="${LD-ld} -m elf_i386"
34379
- ppc64-*linux*|powerpc64-*linux*)
34380
+ powerpc64le-*linux*)
34381
+ LD="${LD-ld} -m elf32lppclinux"
34383
+ powerpc64-*linux*)
34384
LD="${LD-ld} -m elf32ppclinux"
34387
@@ -6538,7 +6541,10 @@
34389
LD="${LD-ld} -m elf_x86_64"
34391
- ppc*-*linux*|powerpc*-*linux*)
34392
+ powerpcle-*linux*)
34393
+ LD="${LD-ld} -m elf64lppc"
34396
LD="${LD-ld} -m elf64ppc"
34398
s390*-*linux*|s390*-*tpf*)
34399
@@ -11105,7 +11111,7 @@
34400
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34401
lt_status=$lt_dlunknown
34402
cat > conftest.$ac_ext <<_LT_EOF
34403
-#line 11108 "configure"
34404
+#line 11114 "configure"
34405
#include "confdefs.h"
34408
@@ -11211,7 +11217,7 @@
34409
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34410
lt_status=$lt_dlunknown
34411
cat > conftest.$ac_ext <<_LT_EOF
34412
-#line 11214 "configure"
34413
+#line 11220 "configure"
34414
#include "confdefs.h"
34417
--- a/src/libgo/testsuite/gotest
34418
+++ b/src/libgo/testsuite/gotest
34419
@@ -369,7 +369,7 @@
34423
- ppc64) text="D" ;;
34424
+ ppc64) text="[TD]" ;;
34427
symtogo='sed -e s/_test/XXXtest/ -e s/.*_\([^_]*\.\)/\1/ -e s/XXXtest/_test/'
34428
--- a/src/libgo/config/libtool.m4
34429
+++ b/src/libgo/config/libtool.m4
34430
@@ -1225,7 +1225,7 @@
34434
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
34435
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
34436
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
34437
# Find out which ABI we are using.
34438
echo 'int i;' > conftest.$ac_ext
34439
@@ -1239,7 +1239,10 @@
34441
LD="${LD-ld} -m elf_i386"
34443
- ppc64-*linux*|powerpc64-*linux*)
34444
+ powerpc64le-*linux*)
34445
+ LD="${LD-ld} -m elf32lppclinux"
34447
+ powerpc64-*linux*)
34448
LD="${LD-ld} -m elf32ppclinux"
34451
@@ -1258,7 +1261,10 @@
34453
LD="${LD-ld} -m elf_x86_64"
34455
- ppc*-*linux*|powerpc*-*linux*)
34456
+ powerpcle-*linux*)
34457
+ LD="${LD-ld} -m elf64lppc"
34460
LD="${LD-ld} -m elf64ppc"
34462
s390*-*linux*|s390*-*tpf*)
34463
--- a/src/config.sub
34464
+++ b/src/config.sub
34467
# Configuration validation subroutine script.
34468
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
34469
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
34470
-# 2011, 2012, 2013 Free Software Foundation, Inc.
34471
+# Copyright 1992-2013 Free Software Foundation, Inc.
34473
-timestamp='2013-01-11'
34474
+timestamp='2013-10-01'
34476
# This file is free software; you can redistribute it and/or modify it
34477
# under the terms of the GNU General Public License as published by
34480
GNU config.sub ($timestamp)
34482
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
34483
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
34484
-2012, 2013 Free Software Foundation, Inc.
34485
+Copyright 1992-2013 Free Software Foundation, Inc.
34487
This is free software; see the source for copying conditions. There is NO
34488
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
34489
@@ -256,12 +252,12 @@
34490
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
34491
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
34495
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
34499
- | c4x | clipper \
34500
+ | c4x | c8051 | clipper \
34501
| d10v | d30v | dlx | dsp16xx \
34503
| fido | fr30 | frv \
34504
@@ -269,6 +265,7 @@
34506
| i370 | i860 | i960 | ia64 \
34511
| m32c | m32r | m32rle | m68000 | m68k | m88k \
34512
@@ -297,10 +294,10 @@
34515
| nds32 | nds32le | nds32be \
34517
+ | nios | nios2 | nios2eb | nios2el \
34522
| pdp10 | pdp11 | pj | pjl \
34523
| powerpc | powerpc64 | powerpc64le | powerpcle \
34525
@@ -328,7 +325,7 @@
34527
basic_machine=tic6x-unknown
34529
- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
34530
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
34531
basic_machine=$basic_machine-unknown
34534
@@ -370,13 +367,13 @@
34535
| aarch64-* | aarch64_be-* \
34536
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
34537
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
34538
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
34539
+ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
34540
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
34541
| avr-* | avr32-* \
34542
| be32-* | be64-* \
34543
| bfin-* | bs2000-* \
34544
| c[123]* | c30-* | [cjt]90-* | c4x-* \
34545
- | clipper-* | craynv-* | cydra-* \
34546
+ | c8051-* | clipper-* | craynv-* | cydra-* \
34547
| d10v-* | d30v-* | dlx-* \
34549
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
34550
@@ -385,6 +382,7 @@
34552
| i*86-* | i860-* | i960-* | ia64-* \
34553
| ip2k-* | iq2000-* \
34555
| le32-* | le64-* \
34557
| m32c-* | m32r-* | m32rle-* \
34558
@@ -414,7 +412,7 @@
34561
| nds32-* | nds32le-* | nds32be-* \
34562
- | nios-* | nios2-* \
34563
+ | nios-* | nios2-* | nios2eb-* | nios2el-* \
34564
| none-* | np1-* | ns16k-* | ns32k-* \
34567
@@ -798,7 +796,7 @@
34571
- basic_machine=i386-pc
34572
+ basic_machine=i686-pc
34576
@@ -834,7 +832,7 @@
34577
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
34580
- basic_machine=i386-pc
34581
+ basic_machine=i686-pc
34585
@@ -1550,6 +1548,9 @@
34595
@@ -1593,6 +1594,9 @@
34605
--- a/src/ChangeLog.ibm
34606
+++ b/src/ChangeLog.ibm
34608
+2013-12-10 Alan Modra <amodra@gmail.com>
34610
+ Apply gcc-4_8-branch r205803
34611
+ 2013-12-05 Alan Modra <amodra@gmail.com>
34612
+ * gcc/configure.ac (BUILD_CXXFLAGS) Don't use ALL_CXXFLAGS for
34614
+ <recursive call for build != host>: Clear GMPINC. Don't bother
34616
+ * gcc/configure: Regenerate.
34618
+2013-11-18 Alan Modra <amodra@gmail.com>
34620
+ Backport mainline r205844.
34621
+ * libffi/src/powerpc/ffitarget.h: Import from upstream.
34622
+ * libffi/src/powerpc/ffi_powerpc.h: Likewise.
34623
+ * libffi/src/powerpc/ffi.c: Likewise.
34624
+ * libffi/src/powerpc/ffi_sysv.c: Likewise.
34625
+ * libffi/src/powerpc/ffi_linux64.c: Likewise.
34626
+ * libffi/src/powerpc/sysv.S: Likewise.
34627
+ * libffi/src/powerpc/ppc_closure.S: Likewise.
34628
+ * libffi/src/powerpc/linux64.S: Likewise.
34629
+ * libffi/src/powerpc/linux64_closure.S: Likewise.
34630
+ * libffi/src/types.c: Likewise.
34631
+ * libffi/Makefile.am (EXTRA_DIST): Add new src/powerpc files.
34632
+ (nodist_libffi_la_SOURCES <POWERPC, POWERPC_FREEBSD>): Likewise.
34633
+ * libffi/configure.ac (HAVE_LONG_DOUBLE_VARIANT): Define for powerpc.
34634
+ * libffi/include/ffi.h.in (ffi_prep_types): Declare.
34635
+ * libffi/src/prep_cif.c (ffi_prep_cif_core): Call ffi_prep_types.
34636
+ * libffi/configure: Regenerate.
34637
+ * libffi/fficonfig.h.in: Regenerate.
34638
+ * libffi/Makefile.in: Regenerate.
34639
+ * libffi/man/Makefile.in: Regenerate.
34640
+ * libffi/include/Makefile.in: Regenerate.
34641
+ * libffi/testsuite/Makefile.in: Regenerate.
34643
+2013-11-22 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
34645
+ * libgo/config/libtool.m4: Update to mainline version.
34646
+ * libgo/configure: Regenerate.
34648
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
34650
+ Backport from mainline r205000.
34652
+ gotest: Recognize PPC ELF v2 function pointers in text section.
34654
+2013-11-18 Alan Modra <amodra@gmail.com>
34656
+ * libffi/src/powerpc/ppc_closure.S: Don't bl .Luint128.
34658
+ * libffi/src/powerpc/ffitarget.h: Import from upstream.
34659
+ * libffi/src/powerpc/ffi.c: Likewise.
34660
+ * libffi/src/powerpc/linux64.S: Likewise.
34661
+ * libffi/src/powerpc/linux64_closure.S: Likewise.
34662
+ * libffi/doc/libffi.texi: Likewise.
34663
+ * libffi/testsuite/libffi.call/cls_double_va.c: Likewise.
34664
+ * libffi/testsuite/libffi.call/cls_longdouble_va.c: Likewise.
34666
+2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
34668
+ * libgo/config/libtool.m4: Update to mainline version.
34669
+ * libgo/configure: Regenerate.
34671
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
34673
+ * libtool.m4: Update to mainline version.
34674
+ * libjava/libltdl/acinclude.m4: Likewise.
34676
+ * gcc/configure: Regenerate.
34677
+ * boehm-gc/configure: Regenerate.
34678
+ * libatomic/configure: Regenerate.
34679
+ * libbacktrace/configure: Regenerate.
34680
+ * libffi/configure: Regenerate.
34681
+ * libgfortran/configure: Regenerate.
34682
+ * libgomp/configure: Regenerate.
34683
+ * libitm/configure: Regenerate.
34684
+ * libjava/configure: Regenerate.
34685
+ * libjava/libltdl/configure: Regenerate.
34686
+ * libjava/classpath/configure: Regenerate.
34687
+ * libmudflap/configure: Regenerate.
34688
+ * libobjc/configure: Regenerate.
34689
+ * libquadmath/configure: Regenerate.
34690
+ * libsanitizer/configure: Regenerate.
34691
+ * libssp/configure: Regenerate.
34692
+ * libstdc++-v3/configure: Regenerate.
34693
+ * lto-plugin/configure: Regenerate.
34694
+ * zlib/configure: Regenerate.
34696
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
34698
+ Backport from mainline r203071:
34700
+ 2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
34702
+ Import from savannah.gnu.org:
34703
+ * config.guess: Update to 2013-06-10 version.
34704
+ * config.sub: Update to 2013-10-01 version.
34706
+2013-11-12 Bill Schmidt <wschmidt@linux.ibm.com>
34708
+ Backport from mainline
34709
+ 2013-09-20 Alan Modra <amodra@gmail.com>
34711
+ * libtool.m4 (_LT_ENABLE_LOCK <ld -m flags>): Remove non-canonical
34712
+ ppc host match. Support little-endian powerpc linux hosts.
34714
--- a/src/libobjc/configure
34715
+++ b/src/libobjc/configure
34716
@@ -6056,7 +6056,7 @@
34720
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
34721
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
34722
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
34723
# Find out which ABI we are using.
34724
echo 'int i;' > conftest.$ac_ext
34725
@@ -6081,7 +6081,10 @@
34729
- ppc64-*linux*|powerpc64-*linux*)
34730
+ powerpc64le-*linux*)
34731
+ LD="${LD-ld} -m elf32lppclinux"
34733
+ powerpc64-*linux*)
34734
LD="${LD-ld} -m elf32ppclinux"
34737
@@ -6100,7 +6103,10 @@
34739
LD="${LD-ld} -m elf_x86_64"
34741
- ppc*-*linux*|powerpc*-*linux*)
34742
+ powerpcle-*linux*)
34743
+ LD="${LD-ld} -m elf64lppc"
34746
LD="${LD-ld} -m elf64ppc"
34748
s390*-*linux*|s390*-*tpf*)
34749
@@ -10595,7 +10601,7 @@
34750
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34751
lt_status=$lt_dlunknown
34752
cat > conftest.$ac_ext <<_LT_EOF
34753
-#line 10598 "configure"
34754
+#line 10604 "configure"
34755
#include "confdefs.h"
34758
@@ -10701,7 +10707,7 @@
34759
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34760
lt_status=$lt_dlunknown
34761
cat > conftest.$ac_ext <<_LT_EOF
34762
-#line 10704 "configure"
34763
+#line 10710 "configure"
34764
#include "confdefs.h"
34767
@@ -11472,7 +11478,7 @@
34768
enableval=$enable_sjlj_exceptions; :
34770
cat > conftest.$ac_ext << EOF
34771
-#line 11475 "configure"
34772
+#line 11481 "configure"
34775
@implementation Frob
34776
--- a/src/libgfortran/configure
34777
+++ b/src/libgfortran/configure
34778
@@ -8062,7 +8062,7 @@
34782
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
34783
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
34784
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
34785
# Find out which ABI we are using.
34786
echo 'int i;' > conftest.$ac_ext
34787
@@ -8087,7 +8087,10 @@
34791
- ppc64-*linux*|powerpc64-*linux*)
34792
+ powerpc64le-*linux*)
34793
+ LD="${LD-ld} -m elf32lppclinux"
34795
+ powerpc64-*linux*)
34796
LD="${LD-ld} -m elf32ppclinux"
34799
@@ -8106,7 +8109,10 @@
34801
LD="${LD-ld} -m elf_x86_64"
34803
- ppc*-*linux*|powerpc*-*linux*)
34804
+ powerpcle-*linux*)
34805
+ LD="${LD-ld} -m elf64lppc"
34808
LD="${LD-ld} -m elf64ppc"
34810
s390*-*linux*|s390*-*tpf*)
34811
@@ -12333,7 +12339,7 @@
34812
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34813
lt_status=$lt_dlunknown
34814
cat > conftest.$ac_ext <<_LT_EOF
34815
-#line 12336 "configure"
34816
+#line 12342 "configure"
34817
#include "confdefs.h"
34820
@@ -12439,7 +12445,7 @@
34821
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34822
lt_status=$lt_dlunknown
34823
cat > conftest.$ac_ext <<_LT_EOF
34824
-#line 12442 "configure"
34825
+#line 12448 "configure"
34826
#include "confdefs.h"
34829
--- a/src/libffi/configure
34830
+++ b/src/libffi/configure
34831
@@ -613,6 +613,7 @@
34832
FFI_EXEC_TRAMPOLINE_TABLE
34833
FFI_EXEC_TRAMPOLINE_TABLE_FALSE
34834
FFI_EXEC_TRAMPOLINE_TABLE_TRUE
34835
+HAVE_LONG_DOUBLE_VARIANT
34839
@@ -6392,7 +6393,7 @@
34843
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
34844
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
34845
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
34846
# Find out which ABI we are using.
34847
echo 'int i;' > conftest.$ac_ext
34848
@@ -6417,7 +6418,10 @@
34852
- ppc64-*linux*|powerpc64-*linux*)
34853
+ powerpc64le-*linux*)
34854
+ LD="${LD-ld} -m elf32lppclinux"
34856
+ powerpc64-*linux*)
34857
LD="${LD-ld} -m elf32ppclinux"
34860
@@ -6436,7 +6440,10 @@
34862
LD="${LD-ld} -m elf_x86_64"
34864
- ppc*-*linux*|powerpc*-*linux*)
34865
+ powerpcle-*linux*)
34866
+ LD="${LD-ld} -m elf64lppc"
34869
LD="${LD-ld} -m elf64ppc"
34871
s390*-*linux*|s390*-*tpf*)
34872
@@ -10900,7 +10907,7 @@
34873
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34874
lt_status=$lt_dlunknown
34875
cat > conftest.$ac_ext <<_LT_EOF
34876
-#line 10903 "configure"
34877
+#line 10910 "configure"
34878
#include "confdefs.h"
34881
@@ -11006,7 +11013,7 @@
34882
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
34883
lt_status=$lt_dlunknown
34884
cat > conftest.$ac_ext <<_LT_EOF
34885
-#line 11009 "configure"
34886
+#line 11016 "configure"
34887
#include "confdefs.h"
34890
@@ -11443,6 +11450,7 @@
34893
TARGETDIR="unknown"
34894
+HAVE_LONG_DOUBLE_VARIANT=0
34897
TARGET=AARCH64; TARGETDIR=aarch64
34898
@@ -11540,6 +11548,7 @@
34900
powerpc*-*-linux* | powerpc-*-sysv*)
34901
TARGET=POWERPC; TARGETDIR=powerpc
34902
+ HAVE_LONG_DOUBLE_VARIANT=1
34904
powerpc-*-amigaos*)
34905
TARGET=POWERPC; TARGETDIR=powerpc
34906
@@ -11555,6 +11564,7 @@
34908
powerpc-*-freebsd* | powerpc-*-openbsd*)
34909
TARGET=POWERPC_FREEBSD; TARGETDIR=powerpc
34910
+ HAVE_LONG_DOUBLE_VARIANT=1
34912
powerpc64-*-freebsd*)
34913
TARGET=POWERPC; TARGETDIR=powerpc
34914
@@ -12230,17 +12240,25 @@
34915
# Also AC_SUBST this variable for ffi.h.
34916
if test -z "$HAVE_LONG_DOUBLE"; then
34918
- if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
34919
- if test $ac_cv_sizeof_long_double != 0; then
34920
+ if test $ac_cv_sizeof_long_double != 0; then
34921
+ if test $HAVE_LONG_DOUBLE_VARIANT != 0; then
34923
+$as_echo "#define HAVE_LONG_DOUBLE_VARIANT 1" >>confdefs.h
34927
+ if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
34928
+ HAVE_LONG_DOUBLE=1
34930
$as_echo "#define HAVE_LONG_DOUBLE 1" >>confdefs.h
34939
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
34940
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
34941
if test "${ac_cv_c_bigendian+set}" = set; then :
34942
--- a/src/libffi/Makefile.in
34943
+++ b/src/libffi/Makefile.in
34944
@@ -48,10 +48,10 @@
34945
@IA64_TRUE@am__append_11 = src/ia64/ffi.c src/ia64/unix.S
34946
@M32R_TRUE@am__append_12 = src/m32r/sysv.S src/m32r/ffi.c
34947
@M68K_TRUE@am__append_13 = src/m68k/ffi.c src/m68k/sysv.S
34948
-@POWERPC_TRUE@am__append_14 = src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
34949
+@POWERPC_TRUE@am__append_14 = src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
34950
@POWERPC_AIX_TRUE@am__append_15 = src/powerpc/ffi_darwin.c src/powerpc/aix.S src/powerpc/aix_closure.S
34951
@POWERPC_DARWIN_TRUE@am__append_16 = src/powerpc/ffi_darwin.c src/powerpc/darwin.S src/powerpc/darwin_closure.S
34952
-@POWERPC_FREEBSD_TRUE@am__append_17 = src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
34953
+@POWERPC_FREEBSD_TRUE@am__append_17 = src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
34954
@AARCH64_TRUE@am__append_18 = src/aarch64/sysv.S src/aarch64/ffi.c
34955
@ARM_TRUE@am__append_19 = src/arm/sysv.S src/arm/ffi.c
34956
@ARM_TRUE@@FFI_EXEC_TRAMPOLINE_TABLE_TRUE@am__append_20 = src/arm/trampoline.S
34957
@@ -133,7 +133,9 @@
34958
@IA64_TRUE@am__objects_11 = src/ia64/ffi.lo src/ia64/unix.lo
34959
@M32R_TRUE@am__objects_12 = src/m32r/sysv.lo src/m32r/ffi.lo
34960
@M68K_TRUE@am__objects_13 = src/m68k/ffi.lo src/m68k/sysv.lo
34961
-@POWERPC_TRUE@am__objects_14 = src/powerpc/ffi.lo src/powerpc/sysv.lo \
34962
+@POWERPC_TRUE@am__objects_14 = src/powerpc/ffi.lo \
34963
+@POWERPC_TRUE@ src/powerpc/ffi_sysv.lo \
34964
+@POWERPC_TRUE@ src/powerpc/ffi_linux64.lo src/powerpc/sysv.lo \
34965
@POWERPC_TRUE@ src/powerpc/ppc_closure.lo \
34966
@POWERPC_TRUE@ src/powerpc/linux64.lo \
34967
@POWERPC_TRUE@ src/powerpc/linux64_closure.lo
34968
@@ -144,6 +146,7 @@
34969
@POWERPC_DARWIN_TRUE@ src/powerpc/darwin.lo \
34970
@POWERPC_DARWIN_TRUE@ src/powerpc/darwin_closure.lo
34971
@POWERPC_FREEBSD_TRUE@am__objects_17 = src/powerpc/ffi.lo \
34972
+@POWERPC_FREEBSD_TRUE@ src/powerpc/ffi_sysv.lo \
34973
@POWERPC_FREEBSD_TRUE@ src/powerpc/sysv.lo \
34974
@POWERPC_FREEBSD_TRUE@ src/powerpc/ppc_closure.lo
34975
@AARCH64_TRUE@am__objects_18 = src/aarch64/sysv.lo src/aarch64/ffi.lo
34976
@@ -278,6 +281,7 @@
34979
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
34980
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
34981
INSTALL = @INSTALL@
34982
INSTALL_DATA = @INSTALL_DATA@
34983
INSTALL_PROGRAM = @INSTALL_PROGRAM@
34984
@@ -387,10 +391,12 @@
34985
src/ia64/unix.S src/mips/ffi.c src/mips/n32.S src/mips/o32.S \
34986
src/mips/ffitarget.h src/m32r/ffi.c src/m32r/sysv.S \
34987
src/m32r/ffitarget.h src/m68k/ffi.c src/m68k/sysv.S \
34988
- src/m68k/ffitarget.h src/powerpc/ffi.c src/powerpc/sysv.S \
34989
- src/powerpc/linux64.S src/powerpc/linux64_closure.S \
34990
- src/powerpc/ppc_closure.S src/powerpc/asm.h \
34991
- src/powerpc/aix.S src/powerpc/darwin.S \
34992
+ src/m68k/ffitarget.h \
34993
+ src/powerpc/ffi.c src/powerpc/ffi_powerpc.h \
34994
+ src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c \
34995
+ src/powerpc/sysv.S src/powerpc/linux64.S \
34996
+ src/powerpc/linux64_closure.S src/powerpc/ppc_closure.S \
34997
+ src/powerpc/asm.h src/powerpc/aix.S src/powerpc/darwin.S \
34998
src/powerpc/aix_closure.S src/powerpc/darwin_closure.S \
34999
src/powerpc/ffi_darwin.c src/powerpc/ffitarget.h \
35000
src/s390/ffi.c src/s390/sysv.S src/s390/ffitarget.h \
35001
@@ -711,6 +717,10 @@
35002
@: > src/powerpc/$(DEPDIR)/$(am__dirstamp)
35003
src/powerpc/ffi.lo: src/powerpc/$(am__dirstamp) \
35004
src/powerpc/$(DEPDIR)/$(am__dirstamp)
35005
+src/powerpc/ffi_sysv.lo: src/powerpc/$(am__dirstamp) \
35006
+ src/powerpc/$(DEPDIR)/$(am__dirstamp)
35007
+src/powerpc/ffi_linux64.lo: src/powerpc/$(am__dirstamp) \
35008
+ src/powerpc/$(DEPDIR)/$(am__dirstamp)
35009
src/powerpc/sysv.lo: src/powerpc/$(am__dirstamp) \
35010
src/powerpc/$(DEPDIR)/$(am__dirstamp)
35011
src/powerpc/ppc_closure.lo: src/powerpc/$(am__dirstamp) \
35012
@@ -912,6 +922,10 @@
35013
-rm -f src/powerpc/ffi.lo
35014
-rm -f src/powerpc/ffi_darwin.$(OBJEXT)
35015
-rm -f src/powerpc/ffi_darwin.lo
35016
+ -rm -f src/powerpc/ffi_linux64.$(OBJEXT)
35017
+ -rm -f src/powerpc/ffi_linux64.lo
35018
+ -rm -f src/powerpc/ffi_sysv.$(OBJEXT)
35019
+ -rm -f src/powerpc/ffi_sysv.lo
35020
-rm -f src/powerpc/linux64.$(OBJEXT)
35021
-rm -f src/powerpc/linux64.lo
35022
-rm -f src/powerpc/linux64_closure.$(OBJEXT)
35023
@@ -1009,6 +1023,8 @@
35024
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/darwin_closure.Plo@am__quote@
35025
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi.Plo@am__quote@
35026
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_darwin.Plo@am__quote@
35027
+@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_linux64.Plo@am__quote@
35028
+@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_sysv.Plo@am__quote@
35029
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/linux64.Plo@am__quote@
35030
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/linux64_closure.Plo@am__quote@
35031
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ppc_closure.Plo@am__quote@
35032
--- a/src/libffi/include/ffi.h.in
35033
+++ b/src/libffi/include/ffi.h.in
35034
@@ -207,6 +207,11 @@
35038
+#if HAVE_LONG_DOUBLE_VARIANT
35039
+/* Used to adjust size/alignment of ffi types. */
35040
+void ffi_prep_types (ffi_abi abi);
35043
/* Used internally, but overridden by some architectures */
35044
ffi_status ffi_prep_cif_core(ffi_cif *cif,
35046
--- a/src/libffi/include/Makefile.in
35047
+++ b/src/libffi/include/Makefile.in
35048
@@ -113,6 +113,7 @@
35051
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
35052
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
35053
INSTALL = @INSTALL@
35054
INSTALL_DATA = @INSTALL_DATA@
35055
INSTALL_PROGRAM = @INSTALL_PROGRAM@
35056
--- a/src/libffi/fficonfig.h.in
35057
+++ b/src/libffi/fficonfig.h.in
35059
/* Define if you have the long double type and it is bigger than a double */
35060
#undef HAVE_LONG_DOUBLE
35062
+/* Define if you support more than one size of the long double type */
35063
+#undef HAVE_LONG_DOUBLE_VARIANT
35065
/* Define to 1 if you have the `memcpy' function. */
35068
--- a/src/libffi/src/powerpc/ppc_closure.S
35069
+++ b/src/libffi/src/powerpc/ppc_closure.S
35072
.file "ppc_closure.S"
35074
-#ifndef __powerpc64__
35077
ENTRY(ffi_closure_SYSV)
35079
@@ -238,7 +238,7 @@
35086
# The return types below are only used when the ABI type is FFI_SYSV.
35087
# case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct.
35088
@@ -378,8 +378,7 @@
35094
#if defined __ELF__ && defined __linux__
35095
.section .note.GNU-stack,"",@progbits
35098
--- a/src/libffi/src/powerpc/ffitarget.h
35099
+++ b/src/libffi/src/powerpc/ffitarget.h
35100
@@ -60,45 +60,76 @@
35101
typedef enum ffi_abi {
35109
- FFI_LINUX_SOFT_FLOAT,
35110
-# if defined(POWERPC64)
35111
- FFI_DEFAULT_ABI = FFI_LINUX64,
35112
-# elif defined(__NO_FPRS__)
35113
- FFI_DEFAULT_ABI = FFI_LINUX_SOFT_FLOAT,
35114
-# elif (__LDBL_MANT_DIG__ == 106)
35115
- FFI_DEFAULT_ABI = FFI_LINUX,
35117
- FFI_DEFAULT_ABI = FFI_GCC_SYSV,
35121
-#ifdef POWERPC_AIX
35122
+#if defined (POWERPC_AIX)
35125
FFI_DEFAULT_ABI = FFI_AIX,
35129
-#ifdef POWERPC_DARWIN
35130
+#elif defined (POWERPC_DARWIN)
35133
FFI_DEFAULT_ABI = FFI_DARWIN,
35137
-#ifdef POWERPC_FREEBSD
35142
- FFI_LINUX_SOFT_FLOAT,
35143
- FFI_DEFAULT_ABI = FFI_SYSV,
35145
+ /* The FFI_COMPAT values are used by old code. Since libffi may be
35146
+ a shared library we have to support old values for backwards
35147
+ compatibility. */
35149
+ FFI_COMPAT_GCC_SYSV,
35150
+ FFI_COMPAT_LINUX64,
35151
+ FFI_COMPAT_LINUX,
35152
+ FFI_COMPAT_LINUX_SOFT_FLOAT,
35154
+# if defined (POWERPC64)
35155
+ /* This bit, always set in new code, must not be set in any of the
35156
+ old FFI_COMPAT values that might be used for 64-bit linux. We
35157
+ only need worry about FFI_COMPAT_LINUX64, but to be safe avoid
35158
+ all old values. */
35160
+ /* This and following bits can reuse FFI_COMPAT values. */
35161
+ FFI_LINUX_STRUCT_ALIGN = 1,
35162
+ FFI_LINUX_LONG_DOUBLE_128 = 2,
35163
+ FFI_DEFAULT_ABI = (FFI_LINUX
35164
+# ifdef __STRUCT_PARM_ALIGN__
35165
+ | FFI_LINUX_STRUCT_ALIGN
35167
+# ifdef __LONG_DOUBLE_128__
35168
+ | FFI_LINUX_LONG_DOUBLE_128
35171
+ FFI_LAST_ABI = 12
35174
+ /* This bit, always set in new code, must not be set in any of the
35175
+ old FFI_COMPAT values that might be used for 32-bit linux/sysv/bsd. */
35177
+ /* This and following bits can reuse FFI_COMPAT values. */
35178
+ FFI_SYSV_SOFT_FLOAT = 1,
35179
+ FFI_SYSV_STRUCT_RET = 2,
35180
+ FFI_SYSV_IBM_LONG_DOUBLE = 4,
35181
+ FFI_SYSV_LONG_DOUBLE_128 = 16,
35183
+ FFI_DEFAULT_ABI = (FFI_SYSV
35184
+# ifdef __NO_FPRS__
35185
+ | FFI_SYSV_SOFT_FLOAT
35187
+# if (defined (__SVR4_STRUCT_RETURN) \
35188
+ || defined (POWERPC_FREEBSD) && !defined (__AIX_STRUCT_RETURN))
35189
+ | FFI_SYSV_STRUCT_RET
35191
+# if __LDBL_MANT_DIG__ == 106
35192
+ | FFI_SYSV_IBM_LONG_DOUBLE
35194
+# ifdef __LONG_DOUBLE_128__
35195
+ | FFI_SYSV_LONG_DOUBLE_128
35198
+ FFI_LAST_ABI = 32
35206
@@ -106,6 +137,10 @@
35208
#define FFI_CLOSURES 1
35209
#define FFI_NATIVE_RAW_API 0
35210
+#if defined (POWERPC) || defined (POWERPC_FREEBSD)
35211
+# define FFI_TARGET_SPECIFIC_VARIADIC 1
35212
+# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
35215
/* For additional types like the below, take care about the order in
35216
ppc_closures.S. They must follow after the FFI_TYPE_LAST. */
35217
@@ -113,19 +148,26 @@
35218
/* Needed for soft-float long-double-128 support. */
35219
#define FFI_TYPE_UINT128 (FFI_TYPE_LAST + 1)
35221
-/* Needed for FFI_SYSV small structure returns.
35222
- We use two flag bits, (FLAG_SYSV_SMST_R3, FLAG_SYSV_SMST_R4) which are
35223
- defined in ffi.c, to determine the exact return type and its size. */
35224
+/* Needed for FFI_SYSV small structure returns. */
35225
#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2)
35227
-#if defined(POWERPC64) || defined(POWERPC_AIX)
35228
+/* Used by ELFv2 for homogenous structure returns. */
35229
+#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1)
35230
+#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2)
35231
+#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3)
35233
+#if _CALL_ELF == 2
35234
+# define FFI_TRAMPOLINE_SIZE 32
35236
+# if defined(POWERPC64) || defined(POWERPC_AIX)
35237
# if defined(POWERPC_DARWIN64)
35238
# define FFI_TRAMPOLINE_SIZE 48
35240
# define FFI_TRAMPOLINE_SIZE 24
35242
-#else /* POWERPC || POWERPC_AIX */
35243
+# else /* POWERPC || POWERPC_AIX */
35244
# define FFI_TRAMPOLINE_SIZE 40
35249
--- a/src/libffi/src/powerpc/ffi.c
35250
+++ b/src/libffi/src/powerpc/ffi.c
35252
/* -----------------------------------------------------------------------
35253
- ffi.c - Copyright (C) 2011 Anthony Green
35254
+ ffi.c - Copyright (C) 2013 IBM
35255
+ Copyright (C) 2011 Anthony Green
35256
Copyright (C) 2011 Kyle Moffett
35257
Copyright (C) 2008 Red Hat, Inc
35258
Copyright (C) 2007, 2008 Free Software Foundation, Inc
35259
@@ -27,966 +28,104 @@
35260
OTHER DEALINGS IN THE SOFTWARE.
35261
----------------------------------------------------------------------- */
35264
-#include <ffi_common.h>
35266
+#include "ffi_common.h"
35267
+#include "ffi_powerpc.h"
35269
-#include <stdlib.h>
35270
-#include <stdio.h>
35273
-extern void ffi_closure_SYSV (void);
35274
-extern void FFI_HIDDEN ffi_closure_LINUX64 (void);
35277
- /* The assembly depends on these exact flags. */
35278
- FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
35279
- FLAG_RETURNS_NOTHING = 1 << (31-30), /* These go in cr7 */
35280
-#ifndef __NO_FPRS__
35281
- FLAG_RETURNS_FP = 1 << (31-29),
35283
- FLAG_RETURNS_64BITS = 1 << (31-28),
35285
- FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */
35287
- FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
35288
-#ifndef __NO_FPRS__
35289
- FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
35291
- FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
35292
- FLAG_RETVAL_REFERENCE = 1 << (31- 4)
35295
-/* About the SYSV ABI. */
35296
-#define ASM_NEEDS_REGISTERS 4
35297
-#define NUM_GPR_ARG_REGISTERS 8
35298
-#ifndef __NO_FPRS__
35299
-# define NUM_FPR_ARG_REGISTERS 8
35302
-/* ffi_prep_args_SYSV is called by the assembly routine once stack space
35303
- has been allocated for the function's arguments.
35305
- The stack layout we want looks like this:
35307
- | Return address from ffi_call_SYSV 4bytes | higher addresses
35308
- |--------------------------------------------|
35309
- | Previous backchain pointer 4 | stack pointer here
35310
- |--------------------------------------------|<+ <<< on entry to
35311
- | Saved r28-r31 4*4 | | ffi_call_SYSV
35312
- |--------------------------------------------| |
35313
- | GPR registers r3-r10 8*4 | | ffi_call_SYSV
35314
- |--------------------------------------------| |
35315
- | FPR registers f1-f8 (optional) 8*8 | |
35316
- |--------------------------------------------| | stack |
35317
- | Space for copied structures | | grows |
35318
- |--------------------------------------------| | down V
35319
- | Parameters that didn't fit in registers | |
35320
- |--------------------------------------------| | lower addresses
35321
- | Space for callee's LR 4 | |
35322
- |--------------------------------------------| | stack pointer here
35323
- | Current backchain pointer 4 |-/ during
35324
- |--------------------------------------------| <<< ffi_call_SYSV
35329
-ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
35330
+#if HAVE_LONG_DOUBLE_VARIANT
35331
+/* Adjust ffi_type_longdouble. */
35333
+ffi_prep_types (ffi_abi abi)
35335
- const unsigned bytes = ecif->cif->bytes;
35336
- const unsigned flags = ecif->cif->flags;
35346
- /* 'stacktop' points at the previous backchain pointer. */
35349
- /* 'gpr_base' points at the space for gpr3, and grows upwards as
35350
- we use GPR registers. */
35352
- int intarg_count;
35354
-#ifndef __NO_FPRS__
35355
- /* 'fpr_base' points at the space for fpr1, and grows upwards as
35356
- we use FPR registers. */
35361
- /* 'copy_space' grows down as we put structures in it. It should
35362
- stay 16-byte aligned. */
35365
- /* 'next_arg' grows up as we put parameters in it. */
35370
-#ifndef __NO_FPRS__
35371
- double double_tmp;
35376
- signed char **sc;
35377
- unsigned char **uc;
35378
- signed short **ss;
35379
- unsigned short **us;
35380
- unsigned int **ui;
35385
- size_t struct_copy_size;
35386
- unsigned gprvalue;
35388
- stacktop.c = (char *) stack + bytes;
35389
- gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
35390
- intarg_count = 0;
35391
-#ifndef __NO_FPRS__
35392
- fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
35394
- copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
35396
- copy_space.c = gpr_base.c;
35398
- next_arg.u = stack + 2;
35400
- /* Check that everything starts aligned properly. */
35401
- FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
35402
- FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
35403
- FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
35404
- FFI_ASSERT ((bytes & 0xF) == 0);
35405
- FFI_ASSERT (copy_space.c >= next_arg.c);
35407
- /* Deal with return values that are actually pass-by-reference. */
35408
- if (flags & FLAG_RETVAL_REFERENCE)
35410
- *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
35414
- /* Now for the arguments. */
35415
- p_argv.v = ecif->avalue;
35416
- for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
35418
- i--, ptr++, p_argv.v++)
35420
- unsigned short typenum = (*ptr)->type;
35422
- /* We may need to handle some values depending on ABI */
35423
- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) {
35424
- if (typenum == FFI_TYPE_FLOAT)
35425
- typenum = FFI_TYPE_UINT32;
35426
- if (typenum == FFI_TYPE_DOUBLE)
35427
- typenum = FFI_TYPE_UINT64;
35428
- if (typenum == FFI_TYPE_LONGDOUBLE)
35429
- typenum = FFI_TYPE_UINT128;
35430
- } else if (ecif->cif->abi != FFI_LINUX) {
35431
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35432
- if (typenum == FFI_TYPE_LONGDOUBLE)
35433
- typenum = FFI_TYPE_STRUCT;
35437
- /* Now test the translated value */
35438
- switch (typenum) {
35439
-#ifndef __NO_FPRS__
35440
- case FFI_TYPE_FLOAT:
35441
- /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */
35442
- double_tmp = **p_argv.f;
35443
- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
35445
- *next_arg.f = (float) double_tmp;
35450
- *fpr_base.d++ = double_tmp;
35452
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35455
- case FFI_TYPE_DOUBLE:
35456
- /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
35457
- double_tmp = **p_argv.d;
35459
- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
35461
- if (intarg_count >= NUM_GPR_ARG_REGISTERS
35462
- && intarg_count % 2 != 0)
35467
- *next_arg.d = double_tmp;
35471
- *fpr_base.d++ = double_tmp;
35473
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35476
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35477
- case FFI_TYPE_LONGDOUBLE:
35478
- double_tmp = (*p_argv.d)[0];
35480
- if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1)
35482
- if (intarg_count >= NUM_GPR_ARG_REGISTERS
35483
- && intarg_count % 2 != 0)
35488
- *next_arg.d = double_tmp;
35490
- double_tmp = (*p_argv.d)[1];
35491
- *next_arg.d = double_tmp;
35496
- *fpr_base.d++ = double_tmp;
35497
- double_tmp = (*p_argv.d)[1];
35498
- *fpr_base.d++ = double_tmp;
35501
- fparg_count += 2;
35502
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35505
-#endif /* have FPRs */
35508
- * The soft float ABI for long doubles works like this, a long double
35509
- * is passed in four consecutive GPRs if available. A maximum of 2
35510
- * long doubles can be passed in gprs. If we do not have 4 GPRs
35511
- * left, the long double is passed on the stack, 4-byte aligned.
35513
- case FFI_TYPE_UINT128: {
35514
- unsigned int int_tmp = (*p_argv.ui)[0];
35516
- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3) {
35517
- if (intarg_count < NUM_GPR_ARG_REGISTERS)
35518
- intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count;
35519
- *(next_arg.u++) = int_tmp;
35520
- for (ii = 1; ii < 4; ii++) {
35521
- int_tmp = (*p_argv.ui)[ii];
35522
- *(next_arg.u++) = int_tmp;
35525
- *(gpr_base.u++) = int_tmp;
35526
- for (ii = 1; ii < 4; ii++) {
35527
- int_tmp = (*p_argv.ui)[ii];
35528
- *(gpr_base.u++) = int_tmp;
35531
- intarg_count += 4;
35535
- case FFI_TYPE_UINT64:
35536
- case FFI_TYPE_SINT64:
35537
- if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
35539
- if (intarg_count >= NUM_GPR_ARG_REGISTERS)
35541
- if (intarg_count % 2 != 0)
35546
- *next_arg.ll = **p_argv.ll;
35551
- /* whoops: abi states only certain register pairs
35552
- * can be used for passing long long int
35553
- * specifically (r3,r4), (r5,r6), (r7,r8),
35554
- * (r9,r10) and if next arg is long long but
35555
- * not correct starting register of pair then skip
35556
- * until the proper starting register
35558
- if (intarg_count % 2 != 0)
35563
- *gpr_base.ll++ = **p_argv.ll;
35565
- intarg_count += 2;
35568
- case FFI_TYPE_STRUCT:
35569
- struct_copy_size = ((*ptr)->size + 15) & ~0xF;
35570
- copy_space.c -= struct_copy_size;
35571
- memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
35573
- gprvalue = (unsigned long) copy_space.c;
35575
- FFI_ASSERT (copy_space.c > next_arg.c);
35576
- FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
35579
- case FFI_TYPE_UINT8:
35580
- gprvalue = **p_argv.uc;
35582
- case FFI_TYPE_SINT8:
35583
- gprvalue = **p_argv.sc;
35585
- case FFI_TYPE_UINT16:
35586
- gprvalue = **p_argv.us;
35588
- case FFI_TYPE_SINT16:
35589
- gprvalue = **p_argv.ss;
35592
- case FFI_TYPE_INT:
35593
- case FFI_TYPE_UINT32:
35594
- case FFI_TYPE_SINT32:
35595
- case FFI_TYPE_POINTER:
35597
- gprvalue = **p_argv.ui;
35600
- if (intarg_count >= NUM_GPR_ARG_REGISTERS)
35601
- *next_arg.u++ = gprvalue;
35603
- *gpr_base.u++ = gprvalue;
35609
- /* Check that we didn't overrun the stack... */
35610
- FFI_ASSERT (copy_space.c >= next_arg.c);
35611
- FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
35612
-#ifndef __NO_FPRS__
35613
- FFI_ASSERT (fpr_base.u
35614
- <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
35616
- FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
35617
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35619
+ ffi_prep_types_linux64 (abi);
35621
+ ffi_prep_types_sysv (abi);
35626
-/* About the LINUX64 ABI. */
35628
- NUM_GPR_ARG_REGISTERS64 = 8,
35629
- NUM_FPR_ARG_REGISTERS64 = 13
35631
-enum { ASM_NEEDS_REGISTERS64 = 4 };
35633
-/* ffi_prep_args64 is called by the assembly routine once stack space
35634
- has been allocated for the function's arguments.
35636
- The stack layout we want looks like this:
35638
- | Ret addr from ffi_call_LINUX64 8bytes | higher addresses
35639
- |--------------------------------------------|
35640
- | CR save area 8bytes |
35641
- |--------------------------------------------|
35642
- | Previous backchain pointer 8 | stack pointer here
35643
- |--------------------------------------------|<+ <<< on entry to
35644
- | Saved r28-r31 4*8 | | ffi_call_LINUX64
35645
- |--------------------------------------------| |
35646
- | GPR registers r3-r10 8*8 | |
35647
- |--------------------------------------------| |
35648
- | FPR registers f1-f13 (optional) 13*8 | |
35649
- |--------------------------------------------| |
35650
- | Parameter save area | |
35651
- |--------------------------------------------| |
35652
- | TOC save area 8 | |
35653
- |--------------------------------------------| | stack |
35654
- | Linker doubleword 8 | | grows |
35655
- |--------------------------------------------| | down V
35656
- | Compiler doubleword 8 | |
35657
- |--------------------------------------------| | lower addresses
35658
- | Space for callee's LR 8 | |
35659
- |--------------------------------------------| |
35660
- | CR save area 8 | |
35661
- |--------------------------------------------| | stack pointer here
35662
- | Current backchain pointer 8 |-/ during
35663
- |--------------------------------------------| <<< ffi_call_LINUX64
35668
-ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
35670
- const unsigned long bytes = ecif->cif->bytes;
35671
- const unsigned long flags = ecif->cif->flags;
35675
- unsigned long *ul;
35680
- /* 'stacktop' points at the previous backchain pointer. */
35683
- /* 'next_arg' points at the space for gpr3, and grows upwards as
35684
- we use GPR registers, then continues at rest. */
35690
- /* 'fpr_base' points at the space for fpr3, and grows upwards as
35691
- we use FPR registers. */
35697
- double double_tmp;
35701
- signed char **sc;
35702
- unsigned char **uc;
35703
- signed short **ss;
35704
- unsigned short **us;
35706
- unsigned int **ui;
35707
- unsigned long **ul;
35711
- unsigned long gprvalue;
35713
- stacktop.c = (char *) stack + bytes;
35714
- gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
35715
- gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
35716
- rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
35717
- fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
35719
- next_arg.ul = gpr_base.ul;
35721
- /* Check that everything starts aligned properly. */
35722
- FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
35723
- FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
35724
- FFI_ASSERT ((bytes & 0xF) == 0);
35726
- /* Deal with return values that are actually pass-by-reference. */
35727
- if (flags & FLAG_RETVAL_REFERENCE)
35728
- *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
35730
- /* Now for the arguments. */
35731
- p_argv.v = ecif->avalue;
35732
- for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
35734
- i--, ptr++, p_argv.v++)
35736
- switch ((*ptr)->type)
35738
- case FFI_TYPE_FLOAT:
35739
- double_tmp = **p_argv.f;
35740
- *next_arg.f = (float) double_tmp;
35741
- if (++next_arg.ul == gpr_end.ul)
35742
- next_arg.ul = rest.ul;
35743
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
35744
- *fpr_base.d++ = double_tmp;
35746
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35749
- case FFI_TYPE_DOUBLE:
35750
- double_tmp = **p_argv.d;
35751
- *next_arg.d = double_tmp;
35752
- if (++next_arg.ul == gpr_end.ul)
35753
- next_arg.ul = rest.ul;
35754
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
35755
- *fpr_base.d++ = double_tmp;
35757
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35760
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35761
- case FFI_TYPE_LONGDOUBLE:
35762
- double_tmp = (*p_argv.d)[0];
35763
- *next_arg.d = double_tmp;
35764
- if (++next_arg.ul == gpr_end.ul)
35765
- next_arg.ul = rest.ul;
35766
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
35767
- *fpr_base.d++ = double_tmp;
35769
- double_tmp = (*p_argv.d)[1];
35770
- *next_arg.d = double_tmp;
35771
- if (++next_arg.ul == gpr_end.ul)
35772
- next_arg.ul = rest.ul;
35773
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
35774
- *fpr_base.d++ = double_tmp;
35776
- FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
35777
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
35781
- case FFI_TYPE_STRUCT:
35782
- words = ((*ptr)->size + 7) / 8;
35783
- if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
35785
- size_t first = gpr_end.c - next_arg.c;
35786
- memcpy (next_arg.c, *p_argv.c, first);
35787
- memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
35788
- next_arg.c = rest.c + words * 8 - first;
35792
- char *where = next_arg.c;
35794
-#ifndef __LITTLE_ENDIAN__
35795
- /* Structures with size less than eight bytes are passed
35797
- if ((*ptr)->size < 8)
35798
- where += 8 - (*ptr)->size;
35800
- memcpy (where, *p_argv.c, (*ptr)->size);
35801
- next_arg.ul += words;
35802
- if (next_arg.ul == gpr_end.ul)
35803
- next_arg.ul = rest.ul;
35807
- case FFI_TYPE_UINT8:
35808
- gprvalue = **p_argv.uc;
35810
- case FFI_TYPE_SINT8:
35811
- gprvalue = **p_argv.sc;
35813
- case FFI_TYPE_UINT16:
35814
- gprvalue = **p_argv.us;
35816
- case FFI_TYPE_SINT16:
35817
- gprvalue = **p_argv.ss;
35819
- case FFI_TYPE_UINT32:
35820
- gprvalue = **p_argv.ui;
35822
- case FFI_TYPE_INT:
35823
- case FFI_TYPE_SINT32:
35824
- gprvalue = **p_argv.si;
35827
- case FFI_TYPE_UINT64:
35828
- case FFI_TYPE_SINT64:
35829
- case FFI_TYPE_POINTER:
35830
- gprvalue = **p_argv.ul;
35832
- *next_arg.ul++ = gprvalue;
35833
- if (next_arg.ul == gpr_end.ul)
35834
- next_arg.ul = rest.ul;
35839
- FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
35840
- || (next_arg.ul >= gpr_base.ul
35841
- && next_arg.ul <= gpr_base.ul + 4));
35846
/* Perform machine dependent cif processing */
35848
+ffi_status FFI_HIDDEN
35849
ffi_prep_cif_machdep (ffi_cif *cif)
35851
- /* All this is for the SYSV and LINUX64 ABI. */
35855
- int fparg_count = 0, intarg_count = 0;
35856
- unsigned flags = 0;
35857
- unsigned struct_copy_size = 0;
35858
- unsigned type = cif->rtype->type;
35859
- unsigned size = cif->rtype->size;
35861
- if (cif->abi != FFI_LINUX64)
35863
- /* All the machine-independent calculation of cif->bytes will be wrong.
35864
- Redo the calculation for SYSV. */
35866
- /* Space for the frame pointer, callee's LR, and the asm's temp regs. */
35867
- bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
35869
- /* Space for the GPR registers. */
35870
- bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
35874
- /* 64-bit ABI. */
35876
- /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
35878
- bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
35880
- /* Space for the mandatory parm save area and general registers. */
35881
- bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
35884
- /* Return value handling. The rules for SYSV are as follows:
35885
- - 32-bit (or less) integer values are returned in gpr3;
35886
- - Structures of size <= 4 bytes also returned in gpr3;
35887
- - 64-bit integer values and structures between 5 and 8 bytes are returned
35888
- in gpr3 and gpr4;
35889
- - Single/double FP values are returned in fpr1;
35890
- - Larger structures are allocated space and a pointer is passed as
35891
- the first argument.
35892
- - long doubles (if not equivalent to double) are returned in
35893
- fpr1,fpr2 for Linux and as for large structs for SysV.
35895
- - integer values in gpr3;
35896
- - Structures/Unions by reference;
35897
- - Single/double FP values in fpr1, long double in fpr1,fpr2.
35898
- - soft-float float/doubles are treated as UINT32/UINT64 respectivley.
35899
- - soft-float long doubles are returned in gpr3-gpr6. */
35900
- /* First translate for softfloat/nonlinux */
35901
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
35902
- if (type == FFI_TYPE_FLOAT)
35903
- type = FFI_TYPE_UINT32;
35904
- if (type == FFI_TYPE_DOUBLE)
35905
- type = FFI_TYPE_UINT64;
35906
- if (type == FFI_TYPE_LONGDOUBLE)
35907
- type = FFI_TYPE_UINT128;
35908
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
35909
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35910
- if (type == FFI_TYPE_LONGDOUBLE)
35911
- type = FFI_TYPE_STRUCT;
35913
+ return ffi_prep_cif_linux64 (cif);
35915
+ return ffi_prep_cif_sysv (cif);
35922
-#ifndef __NO_FPRS__
35923
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35924
- case FFI_TYPE_LONGDOUBLE:
35925
- flags |= FLAG_RETURNS_128BITS;
35926
- /* Fall through. */
35927
+ffi_status FFI_HIDDEN
35928
+ffi_prep_cif_machdep_var (ffi_cif *cif,
35929
+ unsigned int nfixedargs MAYBE_UNUSED,
35930
+ unsigned int ntotalargs MAYBE_UNUSED)
35933
+ return ffi_prep_cif_linux64_var (cif, nfixedargs, ntotalargs);
35935
+ return ffi_prep_cif_sysv (cif);
35937
- case FFI_TYPE_DOUBLE:
35938
- flags |= FLAG_RETURNS_64BITS;
35939
- /* Fall through. */
35940
- case FFI_TYPE_FLOAT:
35941
- flags |= FLAG_RETURNS_FP;
35945
- case FFI_TYPE_UINT128:
35946
- flags |= FLAG_RETURNS_128BITS;
35947
- /* Fall through. */
35948
- case FFI_TYPE_UINT64:
35949
- case FFI_TYPE_SINT64:
35950
- flags |= FLAG_RETURNS_64BITS;
35953
- case FFI_TYPE_STRUCT:
35955
- * The final SYSV ABI says that structures smaller or equal 8 bytes
35956
- * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
35959
- * NOTE: The assembly code can safely assume that it just needs to
35960
- * store both r3 and r4 into a 8-byte word-aligned buffer, as
35961
- * we allocate a temporary buffer in ffi_call() if this flag is
35964
- if (cif->abi == FFI_SYSV && size <= 8)
35965
- flags |= FLAG_RETURNS_SMST;
35967
- flags |= FLAG_RETVAL_REFERENCE;
35968
- /* Fall through. */
35969
- case FFI_TYPE_VOID:
35970
- flags |= FLAG_RETURNS_NOTHING;
35974
- /* Returns 32-bit integer, or similar. Nothing to do here. */
35978
- if (cif->abi != FFI_LINUX64)
35979
- /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
35980
- first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
35981
- goes on the stack. Structures and long doubles (if not equivalent
35982
- to double) are passed as a pointer to a copy of the structure.
35983
- Stuff on the stack needs to keep proper alignment. */
35984
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
35986
- unsigned short typenum = (*ptr)->type;
35988
- /* We may need to handle some values depending on ABI */
35989
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
35990
- if (typenum == FFI_TYPE_FLOAT)
35991
- typenum = FFI_TYPE_UINT32;
35992
- if (typenum == FFI_TYPE_DOUBLE)
35993
- typenum = FFI_TYPE_UINT64;
35994
- if (typenum == FFI_TYPE_LONGDOUBLE)
35995
- typenum = FFI_TYPE_UINT128;
35996
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
35997
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
35998
- if (typenum == FFI_TYPE_LONGDOUBLE)
35999
- typenum = FFI_TYPE_STRUCT;
36003
- switch (typenum) {
36004
-#ifndef __NO_FPRS__
36005
- case FFI_TYPE_FLOAT:
36007
- /* floating singles are not 8-aligned on stack */
36010
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36011
- case FFI_TYPE_LONGDOUBLE:
36015
- case FFI_TYPE_DOUBLE:
36017
- /* If this FP arg is going on the stack, it must be
36018
- 8-byte-aligned. */
36019
- if (fparg_count > NUM_FPR_ARG_REGISTERS
36020
- && intarg_count >= NUM_GPR_ARG_REGISTERS
36021
- && intarg_count % 2 != 0)
36025
- case FFI_TYPE_UINT128:
36027
- * A long double in FFI_LINUX_SOFT_FLOAT can use only a set
36028
- * of four consecutive gprs. If we do not have enough, we
36029
- * have to adjust the intarg_count value.
36031
- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
36032
- && intarg_count < NUM_GPR_ARG_REGISTERS)
36033
- intarg_count = NUM_GPR_ARG_REGISTERS;
36034
- intarg_count += 4;
36037
- case FFI_TYPE_UINT64:
36038
- case FFI_TYPE_SINT64:
36039
- /* 'long long' arguments are passed as two words, but
36040
- either both words must fit in registers or both go
36041
- on the stack. If they go on the stack, they must
36042
- be 8-byte-aligned.
36044
- Also, only certain register pairs can be used for
36045
- passing long long int -- specifically (r3,r4), (r5,r6),
36046
- (r7,r8), (r9,r10).
36048
- if (intarg_count == NUM_GPR_ARG_REGISTERS-1
36049
- || intarg_count % 2 != 0)
36051
- intarg_count += 2;
36054
- case FFI_TYPE_STRUCT:
36055
- /* We must allocate space for a copy of these to enforce
36056
- pass-by-value. Pad the space up to a multiple of 16
36057
- bytes (the maximum alignment required for anything under
36058
- the SYSV ABI). */
36059
- struct_copy_size += ((*ptr)->size + 15) & ~0xF;
36060
- /* Fall through (allocate space for the pointer). */
36062
- case FFI_TYPE_POINTER:
36063
- case FFI_TYPE_INT:
36064
- case FFI_TYPE_UINT32:
36065
- case FFI_TYPE_SINT32:
36066
- case FFI_TYPE_UINT16:
36067
- case FFI_TYPE_SINT16:
36068
- case FFI_TYPE_UINT8:
36069
- case FFI_TYPE_SINT8:
36070
- /* Everything else is passed as a 4-byte word in a GPR, either
36071
- the object itself or a pointer to it. */
36079
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
36081
- switch ((*ptr)->type)
36083
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36084
- case FFI_TYPE_LONGDOUBLE:
36085
- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
36086
- intarg_count += 4;
36089
- fparg_count += 2;
36090
- intarg_count += 2;
36094
- case FFI_TYPE_FLOAT:
36095
- case FFI_TYPE_DOUBLE:
36100
- case FFI_TYPE_STRUCT:
36101
- intarg_count += ((*ptr)->size + 7) / 8;
36104
- case FFI_TYPE_POINTER:
36105
- case FFI_TYPE_UINT64:
36106
- case FFI_TYPE_SINT64:
36107
- case FFI_TYPE_INT:
36108
- case FFI_TYPE_UINT32:
36109
- case FFI_TYPE_SINT32:
36110
- case FFI_TYPE_UINT16:
36111
- case FFI_TYPE_SINT16:
36112
- case FFI_TYPE_UINT8:
36113
- case FFI_TYPE_SINT8:
36114
- /* Everything else is passed as a 8-byte word in a GPR, either
36115
- the object itself or a pointer to it. */
36123
-#ifndef __NO_FPRS__
36124
- if (fparg_count != 0)
36125
- flags |= FLAG_FP_ARGUMENTS;
36127
- if (intarg_count > 4)
36128
- flags |= FLAG_4_GPR_ARGUMENTS;
36129
- if (struct_copy_size != 0)
36130
- flags |= FLAG_ARG_NEEDS_COPY;
36132
- if (cif->abi != FFI_LINUX64)
36134
-#ifndef __NO_FPRS__
36135
- /* Space for the FPR registers, if needed. */
36136
- if (fparg_count != 0)
36137
- bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
36140
- /* Stack space. */
36141
- if (intarg_count > NUM_GPR_ARG_REGISTERS)
36142
- bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
36143
-#ifndef __NO_FPRS__
36144
- if (fparg_count > NUM_FPR_ARG_REGISTERS)
36145
- bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
36150
-#ifndef __NO_FPRS__
36151
- /* Space for the FPR registers, if needed. */
36152
- if (fparg_count != 0)
36153
- bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
36156
- /* Stack space. */
36157
- if (intarg_count > NUM_GPR_ARG_REGISTERS64)
36158
- bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
36161
- /* The stack space allocated needs to be a multiple of 16 bytes. */
36162
- bytes = (bytes + 15) & ~0xF;
36164
- /* Add in the space for the copied structures. */
36165
- bytes += struct_copy_size;
36167
- cif->flags = flags;
36168
- cif->bytes = bytes;
36173
-extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
36174
- void (*fn)(void));
36175
-extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long,
36176
- unsigned long, unsigned long *,
36177
- void (*fn)(void));
36180
ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
36183
- * The final SYSV ABI says that structures smaller or equal 8 bytes
36184
- * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
36187
- * Just to keep things simple for the assembly code, we will always
36188
- * bounce-buffer struct return values less than or equal to 8 bytes.
36189
- * This allows the ASM to handle SYSV small structures by directly
36190
- * writing r3 and r4 to memory without worrying about struct size.
36192
- unsigned int smst_buffer[2];
36193
+ /* The final SYSV ABI says that structures smaller or equal 8 bytes
36194
+ are returned in r3/r4. A draft ABI used by linux instead returns
36197
+ We bounce-buffer SYSV small struct return values so that sysv.S
36198
+ can write r3 and r4 to memory without worrying about struct size.
36200
+ For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
36201
+ for similar reasons. */
36202
+ unsigned long smst_buffer[8];
36204
- unsigned int rsize = 0;
36207
ecif.avalue = avalue;
36209
- /* Ensure that we have a valid struct return value */
36210
ecif.rvalue = rvalue;
36211
- if (cif->rtype->type == FFI_TYPE_STRUCT) {
36212
- rsize = cif->rtype->size;
36214
- ecif.rvalue = smst_buffer;
36215
- else if (!rvalue)
36216
- ecif.rvalue = alloca(rsize);
36218
+ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
36219
+ ecif.rvalue = smst_buffer;
36220
+ /* Ensure that we have a valid struct return value.
36221
+ FIXME: Isn't this just papering over a user problem? */
36222
+ else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT)
36223
+ ecif.rvalue = alloca (cif->rtype->size);
36225
- switch (cif->abi)
36228
-# ifndef __NO_FPRS__
36230
- case FFI_GCC_SYSV:
36233
- case FFI_LINUX_SOFT_FLOAT:
36234
- ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
36237
+ ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
36239
- case FFI_LINUX64:
36240
- ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
36242
+ ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
36249
/* Check for a bounce-buffered return value */
36250
if (rvalue && ecif.rvalue == smst_buffer)
36251
- memcpy(rvalue, smst_buffer, rsize);
36253
+ unsigned int rsize = cif->rtype->size;
36254
+#ifndef __LITTLE_ENDIAN__
36255
+ /* The SYSV ABI returns a structure of up to 4 bytes in size
36256
+ left-padded in r3. */
36257
+# ifndef POWERPC64
36259
+ memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
36262
+ /* The SYSV ABI returns a structure of up to 8 bytes in size
36263
+ left-padded in r3/r4, and the ELFv2 ABI similarly returns a
36264
+ structure of up to 8 bytes in size left-padded in r3. */
36266
+ memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
36269
+ memcpy (rvalue, smst_buffer, rsize);
36275
-#define MIN_CACHE_LINE_SIZE 8
36278
-flush_icache (char *wraddr, char *xaddr, int size)
36281
- for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
36282
- __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
36283
- : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
36284
- __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
36285
- : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
36291
ffi_prep_closure_loc (ffi_closure *closure,
36293
@@ -995,487 +134,8 @@
36297
- void **tramp = (void **) &closure->tramp[0];
36299
- if (cif->abi != FFI_LINUX64)
36300
- return FFI_BAD_ABI;
36301
- /* Copy function address and TOC from ffi_closure_LINUX64. */
36302
- memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
36303
- tramp[2] = codeloc;
36304
+ return ffi_prep_closure_loc_linux64 (closure, cif, fun, user_data, codeloc);
36306
- unsigned int *tramp;
36308
- if (! (cif->abi == FFI_GCC_SYSV
36309
- || cif->abi == FFI_SYSV
36310
- || cif->abi == FFI_LINUX
36311
- || cif->abi == FFI_LINUX_SOFT_FLOAT))
36312
- return FFI_BAD_ABI;
36314
- tramp = (unsigned int *) &closure->tramp[0];
36315
- tramp[0] = 0x7c0802a6; /* mflr r0 */
36316
- tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */
36317
- tramp[4] = 0x7d6802a6; /* mflr r11 */
36318
- tramp[5] = 0x7c0803a6; /* mtlr r0 */
36319
- tramp[6] = 0x800b0000; /* lwz r0,0(r11) */
36320
- tramp[7] = 0x816b0004; /* lwz r11,4(r11) */
36321
- tramp[8] = 0x7c0903a6; /* mtctr r0 */
36322
- tramp[9] = 0x4e800420; /* bctr */
36323
- *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */
36324
- *(void **) &tramp[3] = codeloc; /* context */
36326
- /* Flush the icache. */
36327
- flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
36328
+ return ffi_prep_closure_loc_sysv (closure, cif, fun, user_data, codeloc);
36331
- closure->cif = cif;
36332
- closure->fun = fun;
36333
- closure->user_data = user_data;
36344
-int ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *,
36345
- ffi_dblfl *, unsigned long *);
36347
-/* Basically the trampoline invokes ffi_closure_SYSV, and on
36348
- * entry, r11 holds the address of the closure.
36349
- * After storing the registers that could possibly contain
36350
- * parameters to be passed into the stack frame and setting
36351
- * up space for a return value, ffi_closure_SYSV invokes the
36352
- * following helper function to do most of the work
36356
-ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
36357
- unsigned long *pgr, ffi_dblfl *pfr,
36358
- unsigned long *pst)
36360
- /* rvalue is the pointer to space for return value in closure assembly */
36361
- /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
36362
- /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */
36363
- /* pst is the pointer to outgoing parameter stack in original caller */
36366
- ffi_type ** arg_types;
36368
-#ifndef __NO_FPRS__
36369
- long nf = 0; /* number of floating registers already used */
36371
- long ng = 0; /* number of general registers already used */
36373
- ffi_cif *cif = closure->cif;
36374
- unsigned size = cif->rtype->size;
36375
- unsigned short rtypenum = cif->rtype->type;
36377
- avalue = alloca (cif->nargs * sizeof (void *));
36379
- /* First translate for softfloat/nonlinux */
36380
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
36381
- if (rtypenum == FFI_TYPE_FLOAT)
36382
- rtypenum = FFI_TYPE_UINT32;
36383
- if (rtypenum == FFI_TYPE_DOUBLE)
36384
- rtypenum = FFI_TYPE_UINT64;
36385
- if (rtypenum == FFI_TYPE_LONGDOUBLE)
36386
- rtypenum = FFI_TYPE_UINT128;
36387
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
36388
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36389
- if (rtypenum == FFI_TYPE_LONGDOUBLE)
36390
- rtypenum = FFI_TYPE_STRUCT;
36395
- /* Copy the caller's structure return value address so that the closure
36396
- returns the data directly to the caller.
36397
- For FFI_SYSV the result is passed in r3/r4 if the struct size is less
36398
- or equal 8 bytes. */
36399
- if (rtypenum == FFI_TYPE_STRUCT && ((cif->abi != FFI_SYSV) || (size > 8))) {
36400
- rvalue = (void *) *pgr;
36406
- avn = cif->nargs;
36407
- arg_types = cif->arg_types;
36409
- /* Grab the addresses of the arguments from the stack frame. */
36410
- while (i < avn) {
36411
- unsigned short typenum = arg_types[i]->type;
36413
- /* We may need to handle some values depending on ABI */
36414
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
36415
- if (typenum == FFI_TYPE_FLOAT)
36416
- typenum = FFI_TYPE_UINT32;
36417
- if (typenum == FFI_TYPE_DOUBLE)
36418
- typenum = FFI_TYPE_UINT64;
36419
- if (typenum == FFI_TYPE_LONGDOUBLE)
36420
- typenum = FFI_TYPE_UINT128;
36421
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
36422
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36423
- if (typenum == FFI_TYPE_LONGDOUBLE)
36424
- typenum = FFI_TYPE_STRUCT;
36428
- switch (typenum) {
36429
-#ifndef __NO_FPRS__
36430
- case FFI_TYPE_FLOAT:
36431
- /* unfortunately float values are stored as doubles
36432
- * in the ffi_closure_SYSV code (since we don't check
36433
- * the type in that routine).
36436
- /* there are 8 64bit floating point registers */
36440
- double temp = pfr->d;
36441
- pfr->f = (float) temp;
36448
- /* FIXME? here we are really changing the values
36449
- * stored in the original calling routines outgoing
36450
- * parameter stack. This is probably a really
36451
- * naughty thing to do but...
36458
- case FFI_TYPE_DOUBLE:
36459
- /* On the outgoing stack all values are aligned to 8 */
36460
- /* there are 8 64bit floating point registers */
36470
- if (((long) pst) & 4)
36477
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36478
- case FFI_TYPE_LONGDOUBLE:
36487
- if (((long) pst) & 4)
36495
-#endif /* have FPRS */
36497
- case FFI_TYPE_UINT128:
36499
- * Test if for the whole long double, 4 gprs are available.
36500
- * otherwise the stuff ends up on the stack.
36513
- case FFI_TYPE_SINT8:
36514
- case FFI_TYPE_UINT8:
36515
-#ifndef __LITTLE_ENDIAN__
36516
- /* there are 8 gpr registers used to pass values */
36519
- avalue[i] = (char *) pgr + 3;
36525
- avalue[i] = (char *) pst + 3;
36530
- case FFI_TYPE_SINT16:
36531
- case FFI_TYPE_UINT16:
36532
-#ifndef __LITTLE_ENDIAN__
36533
- /* there are 8 gpr registers used to pass values */
36536
- avalue[i] = (char *) pgr + 2;
36542
- avalue[i] = (char *) pst + 2;
36547
- case FFI_TYPE_SINT32:
36548
- case FFI_TYPE_UINT32:
36549
- case FFI_TYPE_POINTER:
36550
- /* there are 8 gpr registers used to pass values */
36564
- case FFI_TYPE_STRUCT:
36565
- /* Structs are passed by reference. The address will appear in a
36566
- gpr if it is one of the first 8 arguments. */
36569
- avalue[i] = (void *) *pgr;
36575
- avalue[i] = (void *) *pst;
36580
- case FFI_TYPE_SINT64:
36581
- case FFI_TYPE_UINT64:
36582
- /* passing long long ints are complex, they must
36583
- * be passed in suitable register pairs such as
36584
- * (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
36585
- * and if the entire pair aren't available then the outgoing
36586
- * parameter stack is used for both but an alignment of 8
36587
- * must will be kept. So we must either look in pgr
36588
- * or pst to find the correct address for this type
36595
- /* skip r4, r6, r8 as starting points */
36605
- if (((long) pst) & 4)
36621
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
36623
- /* Tell ffi_closure_SYSV how to perform return type promotions.
36624
- Because the FFI_SYSV ABI returns the structures <= 8 bytes in r3/r4
36625
- we have to tell ffi_closure_SYSV how to treat them. We combine the base
36626
- type FFI_SYSV_TYPE_SMALL_STRUCT - 1 with the size of the struct.
36627
- So a one byte struct gets the return type 16. Return type 1 to 15 are
36628
- already used and we never have a struct with size zero. That is the reason
36629
- for the subtraction of 1. See the comment in ffitarget.h about ordering.
36631
- if (cif->abi == FFI_SYSV && rtypenum == FFI_TYPE_STRUCT && size <= 8)
36632
- return (FFI_SYSV_TYPE_SMALL_STRUCT - 1) + size;
36636
-int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
36637
- unsigned long *, ffi_dblfl *);
36640
-ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
36641
- unsigned long *pst, ffi_dblfl *pfr)
36643
- /* rvalue is the pointer to space for return value in closure assembly */
36644
- /* pst is the pointer to parameter save area
36645
- (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
36646
- /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
36649
- ffi_type **arg_types;
36652
- ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
36654
- cif = closure->cif;
36655
- avalue = alloca (cif->nargs * sizeof (void *));
36657
- /* Copy the caller's structure return value address so that the closure
36658
- returns the data directly to the caller. */
36659
- if (cif->rtype->type == FFI_TYPE_STRUCT)
36661
- rvalue = (void *) *pst;
36666
- avn = cif->nargs;
36667
- arg_types = cif->arg_types;
36669
- /* Grab the addresses of the arguments from the stack frame. */
36672
- switch (arg_types[i]->type)
36674
- case FFI_TYPE_SINT8:
36675
- case FFI_TYPE_UINT8:
36676
-#ifndef __LITTLE_ENDIAN__
36677
- avalue[i] = (char *) pst + 7;
36681
- case FFI_TYPE_SINT16:
36682
- case FFI_TYPE_UINT16:
36683
-#ifndef __LITTLE_ENDIAN__
36684
- avalue[i] = (char *) pst + 6;
36688
- case FFI_TYPE_SINT32:
36689
- case FFI_TYPE_UINT32:
36690
-#ifndef __LITTLE_ENDIAN__
36691
- avalue[i] = (char *) pst + 4;
36695
- case FFI_TYPE_SINT64:
36696
- case FFI_TYPE_UINT64:
36697
- case FFI_TYPE_POINTER:
36702
- case FFI_TYPE_STRUCT:
36703
-#ifndef __LITTLE_ENDIAN__
36704
- /* Structures with size less than eight bytes are passed
36706
- if (arg_types[i]->size < 8)
36707
- avalue[i] = (char *) pst + 8 - arg_types[i]->size;
36711
- pst += (arg_types[i]->size + 7) / 8;
36714
- case FFI_TYPE_FLOAT:
36715
- /* unfortunately float values are stored as doubles
36716
- * in the ffi_closure_LINUX64 code (since we don't check
36717
- * the type in that routine).
36720
- /* there are 13 64bit floating point registers */
36722
- if (pfr < end_pfr)
36724
- double temp = pfr->d;
36725
- pfr->f = (float) temp;
36734
- case FFI_TYPE_DOUBLE:
36735
- /* On the outgoing stack all values are aligned to 8 */
36736
- /* there are 13 64bit floating point registers */
36738
- if (pfr < end_pfr)
36748
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
36749
- case FFI_TYPE_LONGDOUBLE:
36750
- if (pfr + 1 < end_pfr)
36757
- if (pfr < end_pfr)
36759
- /* Passed partly in f13 and partly on the stack.
36760
- Move it all to the stack. */
36761
- *pst = *(unsigned long *) pfr;
36778
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
36780
- /* Tell ffi_closure_LINUX64 how to perform return type promotions. */
36781
- return cif->rtype->type;
36783
--- a/src/libffi/src/powerpc/sysv.S
36784
+++ b/src/libffi/src/powerpc/sysv.S
36787
#include <powerpc/asm.h>
36789
-#ifndef __powerpc64__
36791
.globl ffi_prep_args_SYSV
36792
ENTRY(ffi_call_SYSV)
36794
@@ -213,8 +213,8 @@
36800
#if defined __ELF__ && defined __linux__
36801
.section .note.GNU-stack,"",@progbits
36804
--- a/src/libffi/src/powerpc/linux64_closure.S
36805
+++ b/src/libffi/src/powerpc/linux64_closure.S
36806
@@ -30,18 +30,25 @@
36808
.file "linux64_closure.S"
36810
-#ifdef __powerpc64__
36812
FFI_HIDDEN (ffi_closure_LINUX64)
36813
.globl ffi_closure_LINUX64
36814
+# if _CALL_ELF == 2
36816
+ffi_closure_LINUX64:
36817
+ addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha
36818
+ addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l
36819
+ .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
36821
.section ".opd","aw"
36823
ffi_closure_LINUX64:
36824
-#ifdef _CALL_LINUX
36825
+# ifdef _CALL_LINUX
36826
.quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0
36827
.type ffi_closure_LINUX64,@function
36829
.L.ffi_closure_LINUX64:
36832
FFI_HIDDEN (.ffi_closure_LINUX64)
36833
.globl .ffi_closure_LINUX64
36834
.quad .ffi_closure_LINUX64,.TOC.@tocbase,0
36835
@@ -49,61 +56,101 @@
36836
.type .ffi_closure_LINUX64,@function
36838
.ffi_closure_LINUX64:
36843
+# if _CALL_ELF == 2
36844
+# 32 byte special reg save area + 64 byte parm save area
36845
+# + 64 byte retval area + 13*8 fpr save area + round to 16
36846
+# define STACKFRAME 272
36847
+# define PARMSAVE 32
36848
+# define RETVAL PARMSAVE+64
36850
+# 48 bytes special reg save area + 64 bytes parm save area
36851
+# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
36852
+# define STACKFRAME 240
36853
+# define PARMSAVE 48
36854
+# define RETVAL PARMSAVE+64
36858
- # save general regs into parm save area
36863
+# if _CALL_ELF == 2
36864
+ ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif
36866
+ lwz %r12, 28(%r12) # cif->flags
36868
+ addi %r12, %r1, PARMSAVE
36870
+ # Our caller has not allocated a parameter save area.
36871
+ # We need to allocate one here and use it to pass gprs to
36872
+ # ffi_closure_helper_LINUX64.
36873
+ addi %r12, %r1, -STACKFRAME+PARMSAVE
36876
+ # Save general regs into parm save area
36879
+ std %r5, 16(%r12)
36880
+ std %r6, 24(%r12)
36881
+ std %r7, 32(%r12)
36882
+ std %r8, 40(%r12)
36883
+ std %r9, 48(%r12)
36884
+ std %r10, 56(%r12)
36889
- std %r10, 104(%r1)
36890
+ # load up the pointer to the parm save area
36894
+ # Save general regs into parm save area
36895
+ # This is the parameter save area set up by our caller.
36896
+ std %r3, PARMSAVE+0(%r1)
36897
+ std %r4, PARMSAVE+8(%r1)
36898
+ std %r5, PARMSAVE+16(%r1)
36899
+ std %r6, PARMSAVE+24(%r1)
36900
+ std %r7, PARMSAVE+32(%r1)
36901
+ std %r8, PARMSAVE+40(%r1)
36902
+ std %r9, PARMSAVE+48(%r1)
36903
+ std %r10, PARMSAVE+56(%r1)
36907
- # mandatory 48 bytes special reg save area + 64 bytes parm save area
36908
- # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
36909
- stdu %r1, -240(%r1)
36911
+ # load up the pointer to the parm save area
36912
+ addi %r5, %r1, PARMSAVE
36915
# next save fpr 1 to fpr 13
36916
- stfd %f1, 128+(0*8)(%r1)
36917
- stfd %f2, 128+(1*8)(%r1)
36918
- stfd %f3, 128+(2*8)(%r1)
36919
- stfd %f4, 128+(3*8)(%r1)
36920
- stfd %f5, 128+(4*8)(%r1)
36921
- stfd %f6, 128+(5*8)(%r1)
36922
- stfd %f7, 128+(6*8)(%r1)
36923
- stfd %f8, 128+(7*8)(%r1)
36924
- stfd %f9, 128+(8*8)(%r1)
36925
- stfd %f10, 128+(9*8)(%r1)
36926
- stfd %f11, 128+(10*8)(%r1)
36927
- stfd %f12, 128+(11*8)(%r1)
36928
- stfd %f13, 128+(12*8)(%r1)
36929
+ stfd %f1, -104+(0*8)(%r1)
36930
+ stfd %f2, -104+(1*8)(%r1)
36931
+ stfd %f3, -104+(2*8)(%r1)
36932
+ stfd %f4, -104+(3*8)(%r1)
36933
+ stfd %f5, -104+(4*8)(%r1)
36934
+ stfd %f6, -104+(5*8)(%r1)
36935
+ stfd %f7, -104+(6*8)(%r1)
36936
+ stfd %f8, -104+(7*8)(%r1)
36937
+ stfd %f9, -104+(8*8)(%r1)
36938
+ stfd %f10, -104+(9*8)(%r1)
36939
+ stfd %f11, -104+(10*8)(%r1)
36940
+ stfd %f12, -104+(11*8)(%r1)
36941
+ stfd %f13, -104+(12*8)(%r1)
36943
- # set up registers for the routine that actually does the work
36944
- # get the context pointer from the trampoline
36946
+ # load up the pointer to the saved fpr registers */
36947
+ addi %r6, %r1, -104
36949
- # now load up the pointer to the result storage
36950
- addi %r4, %r1, 112
36951
+ # load up the pointer to the result storage
36952
+ addi %r4, %r1, -STACKFRAME+RETVAL
36954
- # now load up the pointer to the parameter save area
36955
- # in the previous frame
36956
- addi %r5, %r1, 240 + 48
36957
+ stdu %r1, -STACKFRAME(%r1)
36960
- # now load up the pointer to the saved fpr registers */
36961
- addi %r6, %r1, 128
36962
+ # get the context pointer from the trampoline
36966
-#ifdef _CALL_LINUX
36967
+# if defined _CALL_LINUX || _CALL_ELF == 2
36968
bl ffi_closure_helper_LINUX64
36971
bl .ffi_closure_helper_LINUX64
36976
# now r3 contains the return type
36977
@@ -112,10 +159,12 @@
36979
# look up the proper starting point in table
36980
# by using return type as offset
36981
+ ld %r0, STACKFRAME+16(%r1)
36982
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
36984
mflr %r4 # move address of .Lret to r4
36985
sldi %r3, %r3, 4 # now multiply return type by 16
36986
addi %r4, %r4, .Lret_type0 - .Lret
36987
- ld %r0, 240+16(%r1)
36988
add %r3, %r3, %r4 # add contents of table to table address
36991
@@ -128,117 +177,175 @@
36993
# case FFI_TYPE_VOID
36995
- addi %r1, %r1, 240
36996
+ addi %r1, %r1, STACKFRAME
36999
# case FFI_TYPE_INT
37000
-#ifdef __LITTLE_ENDIAN__
37001
- lwa %r3, 112+0(%r1)
37003
- lwa %r3, 112+4(%r1)
37005
+# ifdef __LITTLE_ENDIAN__
37006
+ lwa %r3, RETVAL+0(%r1)
37008
+ lwa %r3, RETVAL+4(%r1)
37011
- addi %r1, %r1, 240
37012
+ addi %r1, %r1, STACKFRAME
37014
# case FFI_TYPE_FLOAT
37015
- lfs %f1, 112+0(%r1)
37016
+ lfs %f1, RETVAL+0(%r1)
37018
- addi %r1, %r1, 240
37019
+ addi %r1, %r1, STACKFRAME
37021
# case FFI_TYPE_DOUBLE
37022
- lfd %f1, 112+0(%r1)
37023
+ lfd %f1, RETVAL+0(%r1)
37025
- addi %r1, %r1, 240
37026
+ addi %r1, %r1, STACKFRAME
37028
# case FFI_TYPE_LONGDOUBLE
37029
- lfd %f1, 112+0(%r1)
37030
+ lfd %f1, RETVAL+0(%r1)
37032
- lfd %f2, 112+8(%r1)
37033
+ lfd %f2, RETVAL+8(%r1)
37035
# case FFI_TYPE_UINT8
37036
-#ifdef __LITTLE_ENDIAN__
37037
- lbz %r3, 112+0(%r1)
37039
- lbz %r3, 112+7(%r1)
37041
+# ifdef __LITTLE_ENDIAN__
37042
+ lbz %r3, RETVAL+0(%r1)
37044
+ lbz %r3, RETVAL+7(%r1)
37047
- addi %r1, %r1, 240
37048
+ addi %r1, %r1, STACKFRAME
37050
# case FFI_TYPE_SINT8
37051
-#ifdef __LITTLE_ENDIAN__
37052
- lbz %r3, 112+0(%r1)
37054
- lbz %r3, 112+7(%r1)
37056
+# ifdef __LITTLE_ENDIAN__
37057
+ lbz %r3, RETVAL+0(%r1)
37059
+ lbz %r3, RETVAL+7(%r1)
37064
# case FFI_TYPE_UINT16
37065
-#ifdef __LITTLE_ENDIAN__
37066
- lhz %r3, 112+0(%r1)
37068
- lhz %r3, 112+6(%r1)
37070
+# ifdef __LITTLE_ENDIAN__
37071
+ lhz %r3, RETVAL+0(%r1)
37073
+ lhz %r3, RETVAL+6(%r1)
37077
- addi %r1, %r1, 240
37078
+ addi %r1, %r1, STACKFRAME
37080
# case FFI_TYPE_SINT16
37081
-#ifdef __LITTLE_ENDIAN__
37082
- lha %r3, 112+0(%r1)
37084
- lha %r3, 112+6(%r1)
37086
+# ifdef __LITTLE_ENDIAN__
37087
+ lha %r3, RETVAL+0(%r1)
37089
+ lha %r3, RETVAL+6(%r1)
37092
- addi %r1, %r1, 240
37093
+ addi %r1, %r1, STACKFRAME
37095
# case FFI_TYPE_UINT32
37096
-#ifdef __LITTLE_ENDIAN__
37097
- lwz %r3, 112+0(%r1)
37099
- lwz %r3, 112+4(%r1)
37101
+# ifdef __LITTLE_ENDIAN__
37102
+ lwz %r3, RETVAL+0(%r1)
37104
+ lwz %r3, RETVAL+4(%r1)
37107
- addi %r1, %r1, 240
37108
+ addi %r1, %r1, STACKFRAME
37110
# case FFI_TYPE_SINT32
37111
-#ifdef __LITTLE_ENDIAN__
37112
- lwa %r3, 112+0(%r1)
37114
- lwa %r3, 112+4(%r1)
37116
+# ifdef __LITTLE_ENDIAN__
37117
+ lwa %r3, RETVAL+0(%r1)
37119
+ lwa %r3, RETVAL+4(%r1)
37122
- addi %r1, %r1, 240
37123
+ addi %r1, %r1, STACKFRAME
37125
# case FFI_TYPE_UINT64
37126
- ld %r3, 112+0(%r1)
37127
+ ld %r3, RETVAL+0(%r1)
37129
- addi %r1, %r1, 240
37130
+ addi %r1, %r1, STACKFRAME
37132
# case FFI_TYPE_SINT64
37133
- ld %r3, 112+0(%r1)
37134
+ ld %r3, RETVAL+0(%r1)
37136
- addi %r1, %r1, 240
37137
+ addi %r1, %r1, STACKFRAME
37139
# case FFI_TYPE_STRUCT
37141
- addi %r1, %r1, 240
37142
+ addi %r1, %r1, STACKFRAME
37145
# case FFI_TYPE_POINTER
37146
- ld %r3, 112+0(%r1)
37147
+ ld %r3, RETVAL+0(%r1)
37149
- addi %r1, %r1, 240
37150
+ addi %r1, %r1, STACKFRAME
37153
+# case FFI_V2_TYPE_FLOAT_HOMOG
37154
+ lfs %f1, RETVAL+0(%r1)
37155
+ lfs %f2, RETVAL+4(%r1)
37156
+ lfs %f3, RETVAL+8(%r1)
37158
+# case FFI_V2_TYPE_DOUBLE_HOMOG
37159
+ lfd %f1, RETVAL+0(%r1)
37160
+ lfd %f2, RETVAL+8(%r1)
37161
+ lfd %f3, RETVAL+16(%r1)
37162
+ lfd %f4, RETVAL+24(%r1)
37164
+ lfd %f5, RETVAL+32(%r1)
37165
+ lfd %f6, RETVAL+40(%r1)
37166
+ lfd %f7, RETVAL+48(%r1)
37167
+ lfd %f8, RETVAL+56(%r1)
37168
+ addi %r1, %r1, STACKFRAME
37171
+ lfs %f4, RETVAL+12(%r1)
37173
+ lfs %f5, RETVAL+16(%r1)
37174
+ lfs %f6, RETVAL+20(%r1)
37175
+ lfs %f7, RETVAL+24(%r1)
37176
+ lfs %f8, RETVAL+28(%r1)
37177
+ addi %r1, %r1, STACKFRAME
37180
+# ifdef __LITTLE_ENDIAN__
37181
+ ld %r3,RETVAL+0(%r1)
37183
+ ld %r4,RETVAL+8(%r1)
37184
+ addi %r1, %r1, STACKFRAME
37187
+ # A struct smaller than a dword is returned in the low bits of r3
37188
+ # ie. right justified. Larger structs are passed left justified
37189
+ # in r3 and r4. The return value area on the stack will have
37190
+ # the structs as they are usually stored in memory.
37191
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
37193
+ ld %r3,RETVAL+0(%r1)
37196
+ ld %r4,RETVAL+8(%r1)
37197
+ addi %r1, %r1, STACKFRAME
37200
+ addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
37203
+ addi %r1, %r1, STACKFRAME
37204
+ srd %r3, %r3, %r5
37210
.byte 0,12,0,1,128,0,0,0
37211
-#ifdef _CALL_LINUX
37212
+# if _CALL_ELF == 2
37213
+ .size ffi_closure_LINUX64,.-ffi_closure_LINUX64
37215
+# ifdef _CALL_LINUX
37216
.size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
37219
.size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64
37224
.section .eh_frame,EH_FRAME_FLAGS,@progbits
37226
@@ -267,14 +374,14 @@
37227
.byte 0x2 # DW_CFA_advance_loc1
37229
.byte 0xe # DW_CFA_def_cfa_offset
37231
+ .uleb128 STACKFRAME
37232
.byte 0x11 # DW_CFA_offset_extended_sf
37239
-#if defined __ELF__ && defined __linux__
37240
+# if defined __ELF__ && defined __linux__
37241
.section .note.GNU-stack,"",@progbits
37244
--- a/src/libffi/src/powerpc/ffi_powerpc.h
37245
+++ b/src/libffi/src/powerpc/ffi_powerpc.h
37247
+/* -----------------------------------------------------------------------
37248
+ ffi_powerpc.h - Copyright (C) 2013 IBM
37249
+ Copyright (C) 2011 Anthony Green
37250
+ Copyright (C) 2011 Kyle Moffett
37251
+ Copyright (C) 2008 Red Hat, Inc
37252
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
37253
+ Copyright (c) 1998 Geoffrey Keating
37255
+ PowerPC Foreign Function Interface
37257
+ Permission is hereby granted, free of charge, to any person obtaining
37258
+ a copy of this software and associated documentation files (the
37259
+ ``Software''), to deal in the Software without restriction, including
37260
+ without limitation the rights to use, copy, modify, merge, publish,
37261
+ distribute, sublicense, and/or sell copies of the Software, and to
37262
+ permit persons to whom the Software is furnished to do so, subject to
37263
+ the following conditions:
37265
+ The above copyright notice and this permission notice shall be included
37266
+ in all copies or substantial portions of the Software.
37268
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
37269
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37270
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
37271
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
37272
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
37273
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
37274
+ OTHER DEALINGS IN THE SOFTWARE.
37275
+ ----------------------------------------------------------------------- */
37278
+ /* The assembly depends on these exact flags. */
37279
+ /* These go in cr7 */
37280
+ FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
37281
+ FLAG_RETURNS_NOTHING = 1 << (31-30),
37282
+ FLAG_RETURNS_FP = 1 << (31-29),
37283
+ FLAG_RETURNS_64BITS = 1 << (31-28),
37285
+ /* This goes in cr6 */
37286
+ FLAG_RETURNS_128BITS = 1 << (31-27),
37288
+ FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
37290
+ /* These go in cr1 */
37291
+ FLAG_ARG_NEEDS_COPY = 1 << (31- 7), /* Used by sysv code */
37292
+ FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */
37293
+ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
37294
+ FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
37295
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4)
37304
+void FFI_HIDDEN ffi_closure_SYSV (void);
37305
+void FFI_HIDDEN ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
37308
+void FFI_HIDDEN ffi_prep_types_sysv (ffi_abi);
37309
+ffi_status FFI_HIDDEN ffi_prep_cif_sysv (ffi_cif *);
37310
+int FFI_HIDDEN ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *,
37311
+ ffi_dblfl *, unsigned long *);
37313
+void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long, unsigned long,
37314
+ unsigned long *, void (*)(void));
37315
+void FFI_HIDDEN ffi_closure_LINUX64 (void);
37317
+void FFI_HIDDEN ffi_prep_types_linux64 (ffi_abi);
37318
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64 (ffi_cif *);
37319
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64_var (ffi_cif *, unsigned int,
37321
+void FFI_HIDDEN ffi_prep_args64 (extended_cif *, unsigned long *const);
37322
+int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
37323
+ unsigned long *, ffi_dblfl *);
37324
--- a/src/libffi/src/powerpc/ffi_sysv.c
37325
+++ b/src/libffi/src/powerpc/ffi_sysv.c
37327
+/* -----------------------------------------------------------------------
37328
+ ffi_sysv.c - Copyright (C) 2013 IBM
37329
+ Copyright (C) 2011 Anthony Green
37330
+ Copyright (C) 2011 Kyle Moffett
37331
+ Copyright (C) 2008 Red Hat, Inc
37332
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
37333
+ Copyright (c) 1998 Geoffrey Keating
37335
+ PowerPC Foreign Function Interface
37337
+ Permission is hereby granted, free of charge, to any person obtaining
37338
+ a copy of this software and associated documentation files (the
37339
+ ``Software''), to deal in the Software without restriction, including
37340
+ without limitation the rights to use, copy, modify, merge, publish,
37341
+ distribute, sublicense, and/or sell copies of the Software, and to
37342
+ permit persons to whom the Software is furnished to do so, subject to
37343
+ the following conditions:
37345
+ The above copyright notice and this permission notice shall be included
37346
+ in all copies or substantial portions of the Software.
37348
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
37349
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37350
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
37351
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
37352
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
37353
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
37354
+ OTHER DEALINGS IN THE SOFTWARE.
37355
+ ----------------------------------------------------------------------- */
37360
+#include "ffi_common.h"
37361
+#include "ffi_powerpc.h"
37364
+/* About the SYSV ABI. */
37365
+#define ASM_NEEDS_REGISTERS 4
37366
+#define NUM_GPR_ARG_REGISTERS 8
37367
+#define NUM_FPR_ARG_REGISTERS 8
37370
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37371
+/* Adjust size of ffi_type_longdouble. */
37373
+ffi_prep_types_sysv (ffi_abi abi)
37375
+ if ((abi & (FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128)) == FFI_SYSV)
37377
+ ffi_type_longdouble.size = 8;
37378
+ ffi_type_longdouble.alignment = 8;
37382
+ ffi_type_longdouble.size = 16;
37383
+ ffi_type_longdouble.alignment = 16;
37388
+/* Transform long double, double and float to other types as per abi. */
37390
+translate_float (int abi, int type)
37392
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37393
+ if (type == FFI_TYPE_LONGDOUBLE
37394
+ && (abi & FFI_SYSV_LONG_DOUBLE_128) == 0)
37395
+ type = FFI_TYPE_DOUBLE;
37397
+ if ((abi & FFI_SYSV_SOFT_FLOAT) != 0)
37399
+ if (type == FFI_TYPE_FLOAT)
37400
+ type = FFI_TYPE_UINT32;
37401
+ else if (type == FFI_TYPE_DOUBLE)
37402
+ type = FFI_TYPE_UINT64;
37403
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37404
+ else if (type == FFI_TYPE_LONGDOUBLE)
37405
+ type = FFI_TYPE_UINT128;
37407
+ else if ((abi & FFI_SYSV_IBM_LONG_DOUBLE) == 0)
37409
+ if (type == FFI_TYPE_LONGDOUBLE)
37410
+ type = FFI_TYPE_STRUCT;
37416
+/* Perform machine dependent cif processing */
37418
+ffi_prep_cif_sysv_core (ffi_cif *cif)
37422
+ unsigned i, fparg_count = 0, intarg_count = 0;
37423
+ unsigned flags = cif->flags;
37424
+ unsigned struct_copy_size = 0;
37425
+ unsigned type = cif->rtype->type;
37426
+ unsigned size = cif->rtype->size;
37428
+ /* The machine-independent calculation of cif->bytes doesn't work
37429
+ for us. Redo the calculation. */
37431
+ /* Space for the frame pointer, callee's LR, and the asm's temp regs. */
37432
+ bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
37434
+ /* Space for the GPR registers. */
37435
+ bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
37437
+ /* Return value handling. The rules for SYSV are as follows:
37438
+ - 32-bit (or less) integer values are returned in gpr3;
37439
+ - Structures of size <= 4 bytes also returned in gpr3;
37440
+ - 64-bit integer values and structures between 5 and 8 bytes are returned
37441
+ in gpr3 and gpr4;
37442
+ - Larger structures are allocated space and a pointer is passed as
37443
+ the first argument.
37444
+ - Single/double FP values are returned in fpr1;
37445
+ - long doubles (if not equivalent to double) are returned in
37446
+ fpr1,fpr2 for Linux and as for large structs for SysV. */
37448
+ type = translate_float (cif->abi, type);
37452
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37453
+ case FFI_TYPE_LONGDOUBLE:
37454
+ flags |= FLAG_RETURNS_128BITS;
37455
+ /* Fall through. */
37457
+ case FFI_TYPE_DOUBLE:
37458
+ flags |= FLAG_RETURNS_64BITS;
37459
+ /* Fall through. */
37460
+ case FFI_TYPE_FLOAT:
37461
+ flags |= FLAG_RETURNS_FP;
37462
+#ifdef __NO_FPRS__
37463
+ return FFI_BAD_ABI;
37467
+ case FFI_TYPE_UINT128:
37468
+ flags |= FLAG_RETURNS_128BITS;
37469
+ /* Fall through. */
37470
+ case FFI_TYPE_UINT64:
37471
+ case FFI_TYPE_SINT64:
37472
+ flags |= FLAG_RETURNS_64BITS;
37475
+ case FFI_TYPE_STRUCT:
37476
+ /* The final SYSV ABI says that structures smaller or equal 8 bytes
37477
+ are returned in r3/r4. A draft ABI used by linux instead
37478
+ returns them in memory. */
37479
+ if ((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
37481
+ flags |= FLAG_RETURNS_SMST;
37485
+ flags |= FLAG_RETVAL_REFERENCE;
37486
+ /* Fall through. */
37487
+ case FFI_TYPE_VOID:
37488
+ flags |= FLAG_RETURNS_NOTHING;
37492
+ /* Returns 32-bit integer, or similar. Nothing to do here. */
37496
+ /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
37497
+ first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
37498
+ goes on the stack. Structures and long doubles (if not equivalent
37499
+ to double) are passed as a pointer to a copy of the structure.
37500
+ Stuff on the stack needs to keep proper alignment. */
37501
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
37503
+ unsigned short typenum = (*ptr)->type;
37505
+ typenum = translate_float (cif->abi, typenum);
37509
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37510
+ case FFI_TYPE_LONGDOUBLE:
37514
+ case FFI_TYPE_DOUBLE:
37516
+ /* If this FP arg is going on the stack, it must be
37517
+ 8-byte-aligned. */
37518
+ if (fparg_count > NUM_FPR_ARG_REGISTERS
37519
+ && intarg_count >= NUM_GPR_ARG_REGISTERS
37520
+ && intarg_count % 2 != 0)
37522
+#ifdef __NO_FPRS__
37523
+ return FFI_BAD_ABI;
37527
+ case FFI_TYPE_FLOAT:
37529
+#ifdef __NO_FPRS__
37530
+ return FFI_BAD_ABI;
37534
+ case FFI_TYPE_UINT128:
37535
+ /* A long double in FFI_LINUX_SOFT_FLOAT can use only a set
37536
+ of four consecutive gprs. If we do not have enough, we
37537
+ have to adjust the intarg_count value. */
37538
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
37539
+ && intarg_count < NUM_GPR_ARG_REGISTERS)
37540
+ intarg_count = NUM_GPR_ARG_REGISTERS;
37541
+ intarg_count += 4;
37544
+ case FFI_TYPE_UINT64:
37545
+ case FFI_TYPE_SINT64:
37546
+ /* 'long long' arguments are passed as two words, but
37547
+ either both words must fit in registers or both go
37548
+ on the stack. If they go on the stack, they must
37549
+ be 8-byte-aligned.
37551
+ Also, only certain register pairs can be used for
37552
+ passing long long int -- specifically (r3,r4), (r5,r6),
37553
+ (r7,r8), (r9,r10). */
37554
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1
37555
+ || intarg_count % 2 != 0)
37557
+ intarg_count += 2;
37560
+ case FFI_TYPE_STRUCT:
37561
+ /* We must allocate space for a copy of these to enforce
37562
+ pass-by-value. Pad the space up to a multiple of 16
37563
+ bytes (the maximum alignment required for anything under
37564
+ the SYSV ABI). */
37565
+ struct_copy_size += ((*ptr)->size + 15) & ~0xF;
37566
+ /* Fall through (allocate space for the pointer). */
37568
+ case FFI_TYPE_POINTER:
37569
+ case FFI_TYPE_INT:
37570
+ case FFI_TYPE_UINT32:
37571
+ case FFI_TYPE_SINT32:
37572
+ case FFI_TYPE_UINT16:
37573
+ case FFI_TYPE_SINT16:
37574
+ case FFI_TYPE_UINT8:
37575
+ case FFI_TYPE_SINT8:
37576
+ /* Everything else is passed as a 4-byte word in a GPR, either
37577
+ the object itself or a pointer to it. */
37586
+ if (fparg_count != 0)
37587
+ flags |= FLAG_FP_ARGUMENTS;
37588
+ if (intarg_count > 4)
37589
+ flags |= FLAG_4_GPR_ARGUMENTS;
37590
+ if (struct_copy_size != 0)
37591
+ flags |= FLAG_ARG_NEEDS_COPY;
37593
+ /* Space for the FPR registers, if needed. */
37594
+ if (fparg_count != 0)
37595
+ bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
37597
+ /* Stack space. */
37598
+ if (intarg_count > NUM_GPR_ARG_REGISTERS)
37599
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
37600
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
37601
+ bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
37603
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
37604
+ bytes = (bytes + 15) & ~0xF;
37606
+ /* Add in the space for the copied structures. */
37607
+ bytes += struct_copy_size;
37609
+ cif->flags = flags;
37610
+ cif->bytes = bytes;
37615
+ffi_status FFI_HIDDEN
37616
+ffi_prep_cif_sysv (ffi_cif *cif)
37618
+ if ((cif->abi & FFI_SYSV) == 0)
37620
+ /* This call is from old code. Translate to new ABI values. */
37621
+ cif->flags |= FLAG_COMPAT;
37622
+ switch (cif->abi)
37625
+ return FFI_BAD_ABI;
37627
+ case FFI_COMPAT_SYSV:
37628
+ cif->abi = FFI_SYSV | FFI_SYSV_STRUCT_RET | FFI_SYSV_LONG_DOUBLE_128;
37631
+ case FFI_COMPAT_GCC_SYSV:
37632
+ cif->abi = FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128;
37635
+ case FFI_COMPAT_LINUX:
37636
+ cif->abi = (FFI_SYSV | FFI_SYSV_IBM_LONG_DOUBLE
37637
+ | FFI_SYSV_LONG_DOUBLE_128);
37640
+ case FFI_COMPAT_LINUX_SOFT_FLOAT:
37641
+ cif->abi = (FFI_SYSV | FFI_SYSV_SOFT_FLOAT | FFI_SYSV_IBM_LONG_DOUBLE
37642
+ | FFI_SYSV_LONG_DOUBLE_128);
37646
+ return ffi_prep_cif_sysv_core (cif);
37649
+/* ffi_prep_args_SYSV is called by the assembly routine once stack space
37650
+ has been allocated for the function's arguments.
37652
+ The stack layout we want looks like this:
37654
+ | Return address from ffi_call_SYSV 4bytes | higher addresses
37655
+ |--------------------------------------------|
37656
+ | Previous backchain pointer 4 | stack pointer here
37657
+ |--------------------------------------------|<+ <<< on entry to
37658
+ | Saved r28-r31 4*4 | | ffi_call_SYSV
37659
+ |--------------------------------------------| |
37660
+ | GPR registers r3-r10 8*4 | | ffi_call_SYSV
37661
+ |--------------------------------------------| |
37662
+ | FPR registers f1-f8 (optional) 8*8 | |
37663
+ |--------------------------------------------| | stack |
37664
+ | Space for copied structures | | grows |
37665
+ |--------------------------------------------| | down V
37666
+ | Parameters that didn't fit in registers | |
37667
+ |--------------------------------------------| | lower addresses
37668
+ | Space for callee's LR 4 | |
37669
+ |--------------------------------------------| | stack pointer here
37670
+ | Current backchain pointer 4 |-/ during
37671
+ |--------------------------------------------| <<< ffi_call_SYSV
37676
+ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
37678
+ const unsigned bytes = ecif->cif->bytes;
37679
+ const unsigned flags = ecif->cif->flags;
37690
+ /* 'stacktop' points at the previous backchain pointer. */
37693
+ /* 'gpr_base' points at the space for gpr3, and grows upwards as
37694
+ we use GPR registers. */
37696
+ int intarg_count;
37698
+#ifndef __NO_FPRS__
37699
+ /* 'fpr_base' points at the space for fpr1, and grows upwards as
37700
+ we use FPR registers. */
37705
+ /* 'copy_space' grows down as we put structures in it. It should
37706
+ stay 16-byte aligned. */
37709
+ /* 'next_arg' grows up as we put parameters in it. */
37714
+#ifndef __NO_FPRS__
37715
+ double double_tmp;
37721
+ signed char **sc;
37722
+ unsigned char **uc;
37723
+ signed short **ss;
37724
+ unsigned short **us;
37725
+ unsigned int **ui;
37730
+ size_t struct_copy_size;
37731
+ unsigned gprvalue;
37733
+ stacktop.c = (char *) stack + bytes;
37734
+ gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
37735
+ intarg_count = 0;
37736
+#ifndef __NO_FPRS__
37737
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
37739
+ copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
37741
+ copy_space.c = gpr_base.c;
37743
+ next_arg.u = stack + 2;
37745
+ /* Check that everything starts aligned properly. */
37746
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
37747
+ FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
37748
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
37749
+ FFI_ASSERT ((bytes & 0xF) == 0);
37750
+ FFI_ASSERT (copy_space.c >= next_arg.c);
37752
+ /* Deal with return values that are actually pass-by-reference. */
37753
+ if (flags & FLAG_RETVAL_REFERENCE)
37755
+ *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
37759
+ /* Now for the arguments. */
37760
+ p_argv.v = ecif->avalue;
37761
+ for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
37763
+ i--, ptr++, p_argv.v++)
37765
+ unsigned int typenum = (*ptr)->type;
37767
+ typenum = translate_float (ecif->cif->abi, typenum);
37769
+ /* Now test the translated value */
37772
+#ifndef __NO_FPRS__
37773
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
37774
+ case FFI_TYPE_LONGDOUBLE:
37775
+ double_tmp = (*p_argv.d)[0];
37777
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1)
37779
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
37780
+ && intarg_count % 2 != 0)
37785
+ *next_arg.d = double_tmp;
37787
+ double_tmp = (*p_argv.d)[1];
37788
+ *next_arg.d = double_tmp;
37793
+ *fpr_base.d++ = double_tmp;
37794
+ double_tmp = (*p_argv.d)[1];
37795
+ *fpr_base.d++ = double_tmp;
37798
+ fparg_count += 2;
37799
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
37802
+ case FFI_TYPE_DOUBLE:
37803
+ double_tmp = **p_argv.d;
37805
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
37807
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
37808
+ && intarg_count % 2 != 0)
37813
+ *next_arg.d = double_tmp;
37817
+ *fpr_base.d++ = double_tmp;
37819
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
37822
+ case FFI_TYPE_FLOAT:
37823
+ double_tmp = **p_argv.f;
37824
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
37826
+ *next_arg.f = (float) double_tmp;
37831
+ *fpr_base.d++ = double_tmp;
37833
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
37835
+#endif /* have FPRs */
37837
+ case FFI_TYPE_UINT128:
37838
+ /* The soft float ABI for long doubles works like this, a long double
37839
+ is passed in four consecutive GPRs if available. A maximum of 2
37840
+ long doubles can be passed in gprs. If we do not have 4 GPRs
37841
+ left, the long double is passed on the stack, 4-byte aligned. */
37843
+ unsigned int int_tmp;
37845
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3)
37847
+ if (intarg_count < NUM_GPR_ARG_REGISTERS)
37848
+ intarg_count = NUM_GPR_ARG_REGISTERS;
37849
+ for (ii = 0; ii < 4; ii++)
37851
+ int_tmp = (*p_argv.ui)[ii];
37852
+ *next_arg.u++ = int_tmp;
37857
+ for (ii = 0; ii < 4; ii++)
37859
+ int_tmp = (*p_argv.ui)[ii];
37860
+ *gpr_base.u++ = int_tmp;
37863
+ intarg_count += 4;
37867
+ case FFI_TYPE_UINT64:
37868
+ case FFI_TYPE_SINT64:
37869
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
37871
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
37873
+ if (intarg_count % 2 != 0)
37878
+ *next_arg.ll = **p_argv.ll;
37883
+ /* The abi states only certain register pairs can be
37884
+ used for passing long long int specifically (r3,r4),
37885
+ (r5,r6), (r7,r8), (r9,r10). If next arg is long long
37886
+ but not correct starting register of pair then skip
37887
+ until the proper starting register. */
37888
+ if (intarg_count % 2 != 0)
37893
+ *gpr_base.ll++ = **p_argv.ll;
37895
+ intarg_count += 2;
37898
+ case FFI_TYPE_STRUCT:
37899
+ struct_copy_size = ((*ptr)->size + 15) & ~0xF;
37900
+ copy_space.c -= struct_copy_size;
37901
+ memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
37903
+ gprvalue = (unsigned long) copy_space.c;
37905
+ FFI_ASSERT (copy_space.c > next_arg.c);
37906
+ FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
37909
+ case FFI_TYPE_UINT8:
37910
+ gprvalue = **p_argv.uc;
37912
+ case FFI_TYPE_SINT8:
37913
+ gprvalue = **p_argv.sc;
37915
+ case FFI_TYPE_UINT16:
37916
+ gprvalue = **p_argv.us;
37918
+ case FFI_TYPE_SINT16:
37919
+ gprvalue = **p_argv.ss;
37922
+ case FFI_TYPE_INT:
37923
+ case FFI_TYPE_UINT32:
37924
+ case FFI_TYPE_SINT32:
37925
+ case FFI_TYPE_POINTER:
37927
+ gprvalue = **p_argv.ui;
37930
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
37931
+ *next_arg.u++ = gprvalue;
37933
+ *gpr_base.u++ = gprvalue;
37939
+ /* Check that we didn't overrun the stack... */
37940
+ FFI_ASSERT (copy_space.c >= next_arg.c);
37941
+ FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
37942
+ /* The assert below is testing that the number of integer arguments agrees
37943
+ with the number found in ffi_prep_cif_machdep(). However, intarg_count
37944
+ is incremented whenever we place an FP arg on the stack, so account for
37945
+ that before our assert test. */
37946
+#ifndef __NO_FPRS__
37947
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
37948
+ intarg_count -= fparg_count - NUM_FPR_ARG_REGISTERS;
37949
+ FFI_ASSERT (fpr_base.u
37950
+ <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
37952
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
37955
+#define MIN_CACHE_LINE_SIZE 8
37958
+flush_icache (char *wraddr, char *xaddr, int size)
37961
+ for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
37962
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
37963
+ : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
37964
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
37965
+ : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
37969
+ffi_status FFI_HIDDEN
37970
+ffi_prep_closure_loc_sysv (ffi_closure *closure,
37972
+ void (*fun) (ffi_cif *, void *, void **, void *),
37976
+ unsigned int *tramp;
37978
+ if (cif->abi < FFI_SYSV || cif->abi >= FFI_LAST_ABI)
37979
+ return FFI_BAD_ABI;
37981
+ tramp = (unsigned int *) &closure->tramp[0];
37982
+ tramp[0] = 0x7c0802a6; /* mflr r0 */
37983
+ tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */
37984
+ tramp[4] = 0x7d6802a6; /* mflr r11 */
37985
+ tramp[5] = 0x7c0803a6; /* mtlr r0 */
37986
+ tramp[6] = 0x800b0000; /* lwz r0,0(r11) */
37987
+ tramp[7] = 0x816b0004; /* lwz r11,4(r11) */
37988
+ tramp[8] = 0x7c0903a6; /* mtctr r0 */
37989
+ tramp[9] = 0x4e800420; /* bctr */
37990
+ *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */
37991
+ *(void **) &tramp[3] = codeloc; /* context */
37993
+ /* Flush the icache. */
37994
+ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
37996
+ closure->cif = cif;
37997
+ closure->fun = fun;
37998
+ closure->user_data = user_data;
38003
+/* Basically the trampoline invokes ffi_closure_SYSV, and on
38004
+ entry, r11 holds the address of the closure.
38005
+ After storing the registers that could possibly contain
38006
+ parameters to be passed into the stack frame and setting
38007
+ up space for a return value, ffi_closure_SYSV invokes the
38008
+ following helper function to do most of the work. */
38011
+ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
38012
+ unsigned long *pgr, ffi_dblfl *pfr,
38013
+ unsigned long *pst)
38015
+ /* rvalue is the pointer to space for return value in closure assembly */
38016
+ /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
38017
+ /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */
38018
+ /* pst is the pointer to outgoing parameter stack in original caller */
38021
+ ffi_type ** arg_types;
38023
+#ifndef __NO_FPRS__
38024
+ long nf = 0; /* number of floating registers already used */
38026
+ long ng = 0; /* number of general registers already used */
38028
+ ffi_cif *cif = closure->cif;
38029
+ unsigned size = cif->rtype->size;
38030
+ unsigned short rtypenum = cif->rtype->type;
38032
+ avalue = alloca (cif->nargs * sizeof (void *));
38034
+ /* First translate for softfloat/nonlinux */
38035
+ rtypenum = translate_float (cif->abi, rtypenum);
38037
+ /* Copy the caller's structure return value address so that the closure
38038
+ returns the data directly to the caller.
38039
+ For FFI_SYSV the result is passed in r3/r4 if the struct size is less
38040
+ or equal 8 bytes. */
38041
+ if (rtypenum == FFI_TYPE_STRUCT
38042
+ && !((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8))
38044
+ rvalue = (void *) *pgr;
38050
+ avn = cif->nargs;
38051
+ arg_types = cif->arg_types;
38053
+ /* Grab the addresses of the arguments from the stack frame. */
38054
+ while (i < avn) {
38055
+ unsigned short typenum = arg_types[i]->type;
38057
+ /* We may need to handle some values depending on ABI. */
38058
+ typenum = translate_float (cif->abi, typenum);
38062
+#ifndef __NO_FPRS__
38063
+ case FFI_TYPE_FLOAT:
38064
+ /* Unfortunately float values are stored as doubles
38065
+ in the ffi_closure_SYSV code (since we don't check
38066
+ the type in that routine). */
38067
+ if (nf < NUM_FPR_ARG_REGISTERS)
38069
+ /* FIXME? here we are really changing the values
38070
+ stored in the original calling routines outgoing
38071
+ parameter stack. This is probably a really
38072
+ naughty thing to do but... */
38073
+ double temp = pfr->d;
38074
+ pfr->f = (float) temp;
38086
+ case FFI_TYPE_DOUBLE:
38087
+ if (nf < NUM_FPR_ARG_REGISTERS)
38095
+ if (((long) pst) & 4)
38102
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
38103
+ case FFI_TYPE_LONGDOUBLE:
38104
+ if (nf < NUM_FPR_ARG_REGISTERS - 1)
38112
+ if (((long) pst) & 4)
38122
+ case FFI_TYPE_UINT128:
38123
+ /* Test if for the whole long double, 4 gprs are available.
38124
+ otherwise the stuff ends up on the stack. */
38125
+ if (ng < NUM_GPR_ARG_REGISTERS - 3)
38139
+ case FFI_TYPE_SINT8:
38140
+ case FFI_TYPE_UINT8:
38141
+#ifndef __LITTLE_ENDIAN__
38142
+ if (ng < NUM_GPR_ARG_REGISTERS)
38144
+ avalue[i] = (char *) pgr + 3;
38150
+ avalue[i] = (char *) pst + 3;
38156
+ case FFI_TYPE_SINT16:
38157
+ case FFI_TYPE_UINT16:
38158
+#ifndef __LITTLE_ENDIAN__
38159
+ if (ng < NUM_GPR_ARG_REGISTERS)
38161
+ avalue[i] = (char *) pgr + 2;
38167
+ avalue[i] = (char *) pst + 2;
38173
+ case FFI_TYPE_SINT32:
38174
+ case FFI_TYPE_UINT32:
38175
+ case FFI_TYPE_POINTER:
38176
+ if (ng < NUM_GPR_ARG_REGISTERS)
38189
+ case FFI_TYPE_STRUCT:
38190
+ /* Structs are passed by reference. The address will appear in a
38191
+ gpr if it is one of the first 8 arguments. */
38192
+ if (ng < NUM_GPR_ARG_REGISTERS)
38194
+ avalue[i] = (void *) *pgr;
38200
+ avalue[i] = (void *) *pst;
38205
+ case FFI_TYPE_SINT64:
38206
+ case FFI_TYPE_UINT64:
38207
+ /* Passing long long ints are complex, they must
38208
+ be passed in suitable register pairs such as
38209
+ (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
38210
+ and if the entire pair aren't available then the outgoing
38211
+ parameter stack is used for both but an alignment of 8
38212
+ must will be kept. So we must either look in pgr
38213
+ or pst to find the correct address for this type
38215
+ if (ng < NUM_GPR_ARG_REGISTERS - 1)
38219
+ /* skip r4, r6, r8 as starting points */
38229
+ if (((long) pst) & 4)
38233
+ ng = NUM_GPR_ARG_REGISTERS;
38244
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
38246
+ /* Tell ffi_closure_SYSV how to perform return type promotions.
38247
+ Because the FFI_SYSV ABI returns the structures <= 8 bytes in
38248
+ r3/r4 we have to tell ffi_closure_SYSV how to treat them. We
38249
+ combine the base type FFI_SYSV_TYPE_SMALL_STRUCT with the size of
38250
+ the struct less one. We never have a struct with size zero.
38251
+ See the comment in ffitarget.h about ordering. */
38252
+ if (rtypenum == FFI_TYPE_STRUCT
38253
+ && (cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
38254
+ return FFI_SYSV_TYPE_SMALL_STRUCT - 1 + size;
38258
--- a/src/libffi/src/powerpc/linux64.S
38259
+++ b/src/libffi/src/powerpc/linux64.S
38260
@@ -29,18 +29,25 @@
38261
#include <fficonfig.h>
38264
-#ifdef __powerpc64__
38266
.hidden ffi_call_LINUX64
38267
.globl ffi_call_LINUX64
38268
+# if _CALL_ELF == 2
38271
+ addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha
38272
+ addi %r2, %r2, .TOC.-ffi_call_LINUX64@l
38273
+ .localentry ffi_call_LINUX64, . - ffi_call_LINUX64
38275
.section ".opd","aw"
38278
-#ifdef _CALL_LINUX
38279
+# ifdef _CALL_LINUX
38280
.quad .L.ffi_call_LINUX64,.TOC.@tocbase,0
38281
.type ffi_call_LINUX64,@function
38283
.L.ffi_call_LINUX64:
38286
.hidden .ffi_call_LINUX64
38287
.globl .ffi_call_LINUX64
38288
.quad .ffi_call_LINUX64,.TOC.@tocbase,0
38290
.type .ffi_call_LINUX64,@function
38299
@@ -63,26 +71,35 @@
38300
mr %r31, %r5 /* flags, */
38301
mr %r30, %r6 /* rvalue, */
38302
mr %r29, %r7 /* function address. */
38303
+/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
38304
+ bctrl function call. */
38305
+# if _CALL_ELF == 2
38311
/* Call ffi_prep_args64. */
38313
-#ifdef _CALL_LINUX
38314
+# if defined _CALL_LINUX || _CALL_ELF == 2
38318
bl .ffi_prep_args64
38323
+# if _CALL_ELF == 2
38331
/* Now do the call. */
38332
/* Set up cr1 with bits 4-7 of the flags. */
38335
/* Get the address to call into CTR. */
38338
/* Load all those argument registers. */
38339
ld %r3, -32-(8*8)(%r28)
38340
ld %r4, -32-(7*8)(%r28)
38341
@@ -117,12 +134,17 @@
38343
/* This must follow the call immediately, the unwinder
38344
uses this to find out if r2 has been saved or not. */
38345
+# if _CALL_ELF == 2
38351
/* Now, deal with the return value. */
38353
- bt- 30, .Ldone_return_value
38354
- bt- 29, .Lfp_return_value
38355
+ bt 31, .Lstruct_return_value
38356
+ bt 30, .Ldone_return_value
38357
+ bt 29, .Lfp_return_value
38359
/* Fall through... */
38361
@@ -130,7 +152,7 @@
38362
/* Restore the registers we used and return. */
38365
- ld %r28, -32(%r1)
38366
+ ld %r28, -32(%r28)
38370
@@ -147,14 +169,48 @@
38371
.Lfloat_return_value:
38373
b .Ldone_return_value
38375
+.Lstruct_return_value:
38376
+ bf 29, .Lsmall_struct
38377
+ bf 28, .Lfloat_homog_return_value
38378
+ stfd %f1, 0(%r30)
38379
+ stfd %f2, 8(%r30)
38380
+ stfd %f3, 16(%r30)
38381
+ stfd %f4, 24(%r30)
38382
+ stfd %f5, 32(%r30)
38383
+ stfd %f6, 40(%r30)
38384
+ stfd %f7, 48(%r30)
38385
+ stfd %f8, 56(%r30)
38386
+ b .Ldone_return_value
38388
+.Lfloat_homog_return_value:
38389
+ stfs %f1, 0(%r30)
38390
+ stfs %f2, 4(%r30)
38391
+ stfs %f3, 8(%r30)
38392
+ stfs %f4, 12(%r30)
38393
+ stfs %f5, 16(%r30)
38394
+ stfs %f6, 20(%r30)
38395
+ stfs %f7, 24(%r30)
38396
+ stfs %f8, 28(%r30)
38397
+ b .Ldone_return_value
38402
+ b .Ldone_return_value
38406
.byte 0,12,0,1,128,4,0,0
38407
-#ifdef _CALL_LINUX
38408
+# if _CALL_ELF == 2
38409
+ .size ffi_call_LINUX64,.-ffi_call_LINUX64
38411
+# ifdef _CALL_LINUX
38412
.size ffi_call_LINUX64,.-.L.ffi_call_LINUX64
38415
.size .ffi_call_LINUX64,.-.ffi_call_LINUX64
38420
.section .eh_frame,EH_FRAME_FLAGS,@progbits
38422
@@ -197,8 +253,8 @@
38428
-#if defined __ELF__ && defined __linux__
38429
+# if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
38430
.section .note.GNU-stack,"",@progbits
38433
--- a/src/libffi/src/powerpc/ffi_linux64.c
38434
+++ b/src/libffi/src/powerpc/ffi_linux64.c
38436
+/* -----------------------------------------------------------------------
38437
+ ffi_linux64.c - Copyright (C) 2013 IBM
38438
+ Copyright (C) 2011 Anthony Green
38439
+ Copyright (C) 2011 Kyle Moffett
38440
+ Copyright (C) 2008 Red Hat, Inc
38441
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
38442
+ Copyright (c) 1998 Geoffrey Keating
38444
+ PowerPC Foreign Function Interface
38446
+ Permission is hereby granted, free of charge, to any person obtaining
38447
+ a copy of this software and associated documentation files (the
38448
+ ``Software''), to deal in the Software without restriction, including
38449
+ without limitation the rights to use, copy, modify, merge, publish,
38450
+ distribute, sublicense, and/or sell copies of the Software, and to
38451
+ permit persons to whom the Software is furnished to do so, subject to
38452
+ the following conditions:
38454
+ The above copyright notice and this permission notice shall be included
38455
+ in all copies or substantial portions of the Software.
38457
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
38458
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38459
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
38460
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
38461
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38462
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
38463
+ OTHER DEALINGS IN THE SOFTWARE.
38464
+ ----------------------------------------------------------------------- */
38469
+#include "ffi_common.h"
38470
+#include "ffi_powerpc.h"
38473
+/* About the LINUX64 ABI. */
38475
+ NUM_GPR_ARG_REGISTERS64 = 8,
38476
+ NUM_FPR_ARG_REGISTERS64 = 13
38478
+enum { ASM_NEEDS_REGISTERS64 = 4 };
38481
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
38482
+/* Adjust size of ffi_type_longdouble. */
38484
+ffi_prep_types_linux64 (ffi_abi abi)
38486
+ if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
38488
+ ffi_type_longdouble.size = 8;
38489
+ ffi_type_longdouble.alignment = 8;
38493
+ ffi_type_longdouble.size = 16;
38494
+ ffi_type_longdouble.alignment = 16;
38500
+#if _CALL_ELF == 2
38501
+static unsigned int
38502
+discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
38506
+ case FFI_TYPE_FLOAT:
38507
+ case FFI_TYPE_DOUBLE:
38509
+ return (int) t->type;
38511
+ case FFI_TYPE_STRUCT:;
38513
+ unsigned int base_elt = 0, total_elnum = 0;
38514
+ ffi_type **el = t->elements;
38517
+ unsigned int el_elt, el_elnum = 0;
38518
+ el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
38520
+ || (base_elt && base_elt != el_elt))
38522
+ base_elt = el_elt;
38523
+ total_elnum += el_elnum;
38524
+ if (total_elnum > 8)
38528
+ *elnum = total_elnum;
38539
+/* Perform machine dependent cif processing */
38541
+ffi_prep_cif_linux64_core (ffi_cif *cif)
38545
+ unsigned i, fparg_count = 0, intarg_count = 0;
38546
+ unsigned flags = cif->flags;
38547
+#if _CALL_ELF == 2
38548
+ unsigned int elt, elnum;
38551
+#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
38552
+ /* If compiled without long double support.. */
38553
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
38554
+ return FFI_BAD_ABI;
38557
+ /* The machine-independent calculation of cif->bytes doesn't work
38558
+ for us. Redo the calculation. */
38559
+#if _CALL_ELF == 2
38560
+ /* Space for backchain, CR, LR, TOC and the asm's temp regs. */
38561
+ bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
38563
+ /* Space for the general registers. */
38564
+ bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
38566
+ /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
38568
+ bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
38570
+ /* Space for the mandatory parm save area and general registers. */
38571
+ bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
38574
+ /* Return value handling. */
38575
+ switch (cif->rtype->type)
38577
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
38578
+ case FFI_TYPE_LONGDOUBLE:
38579
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
38580
+ flags |= FLAG_RETURNS_128BITS;
38581
+ /* Fall through. */
38583
+ case FFI_TYPE_DOUBLE:
38584
+ flags |= FLAG_RETURNS_64BITS;
38585
+ /* Fall through. */
38586
+ case FFI_TYPE_FLOAT:
38587
+ flags |= FLAG_RETURNS_FP;
38590
+ case FFI_TYPE_UINT128:
38591
+ flags |= FLAG_RETURNS_128BITS;
38592
+ /* Fall through. */
38593
+ case FFI_TYPE_UINT64:
38594
+ case FFI_TYPE_SINT64:
38595
+ flags |= FLAG_RETURNS_64BITS;
38598
+ case FFI_TYPE_STRUCT:
38599
+#if _CALL_ELF == 2
38600
+ elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
38603
+ if (elt == FFI_TYPE_DOUBLE)
38604
+ flags |= FLAG_RETURNS_64BITS;
38605
+ flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
38608
+ if (cif->rtype->size <= 16)
38610
+ flags |= FLAG_RETURNS_SMST;
38615
+ flags |= FLAG_RETVAL_REFERENCE;
38616
+ /* Fall through. */
38617
+ case FFI_TYPE_VOID:
38618
+ flags |= FLAG_RETURNS_NOTHING;
38622
+ /* Returns 32-bit integer, or similar. Nothing to do here. */
38626
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
38628
+ unsigned int align;
38630
+ switch ((*ptr)->type)
38632
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
38633
+ case FFI_TYPE_LONGDOUBLE:
38634
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
38639
+ /* Fall through. */
38641
+ case FFI_TYPE_DOUBLE:
38642
+ case FFI_TYPE_FLOAT:
38645
+ if (fparg_count > NUM_FPR_ARG_REGISTERS64)
38646
+ flags |= FLAG_ARG_NEEDS_PSAVE;
38649
+ case FFI_TYPE_STRUCT:
38650
+ if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
38652
+ align = (*ptr)->alignment;
38655
+ align = align / 8;
38657
+ intarg_count = ALIGN (intarg_count, align);
38659
+ intarg_count += ((*ptr)->size + 7) / 8;
38660
+#if _CALL_ELF == 2
38661
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
38664
+ fparg_count += elnum;
38665
+ if (fparg_count > NUM_FPR_ARG_REGISTERS64)
38666
+ flags |= FLAG_ARG_NEEDS_PSAVE;
38671
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
38672
+ flags |= FLAG_ARG_NEEDS_PSAVE;
38676
+ case FFI_TYPE_POINTER:
38677
+ case FFI_TYPE_UINT64:
38678
+ case FFI_TYPE_SINT64:
38679
+ case FFI_TYPE_INT:
38680
+ case FFI_TYPE_UINT32:
38681
+ case FFI_TYPE_SINT32:
38682
+ case FFI_TYPE_UINT16:
38683
+ case FFI_TYPE_SINT16:
38684
+ case FFI_TYPE_UINT8:
38685
+ case FFI_TYPE_SINT8:
38686
+ /* Everything else is passed as a 8-byte word in a GPR, either
38687
+ the object itself or a pointer to it. */
38689
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
38690
+ flags |= FLAG_ARG_NEEDS_PSAVE;
38697
+ if (fparg_count != 0)
38698
+ flags |= FLAG_FP_ARGUMENTS;
38699
+ if (intarg_count > 4)
38700
+ flags |= FLAG_4_GPR_ARGUMENTS;
38702
+ /* Space for the FPR registers, if needed. */
38703
+ if (fparg_count != 0)
38704
+ bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
38706
+ /* Stack space. */
38707
+#if _CALL_ELF == 2
38708
+ if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
38709
+ bytes += intarg_count * sizeof (long);
38711
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
38712
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
38715
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
38716
+ bytes = (bytes + 15) & ~0xF;
38718
+ cif->flags = flags;
38719
+ cif->bytes = bytes;
38724
+ffi_status FFI_HIDDEN
38725
+ffi_prep_cif_linux64 (ffi_cif *cif)
38727
+ if ((cif->abi & FFI_LINUX) != 0)
38728
+ cif->nfixedargs = cif->nargs;
38729
+#if _CALL_ELF != 2
38730
+ else if (cif->abi == FFI_COMPAT_LINUX64)
38732
+ /* This call is from old code. Don't touch cif->nfixedargs
38733
+ since old code will be using a smaller cif. */
38734
+ cif->flags |= FLAG_COMPAT;
38735
+ /* Translate to new abi value. */
38736
+ cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
38740
+ return FFI_BAD_ABI;
38741
+ return ffi_prep_cif_linux64_core (cif);
38744
+ffi_status FFI_HIDDEN
38745
+ffi_prep_cif_linux64_var (ffi_cif *cif,
38746
+ unsigned int nfixedargs,
38747
+ unsigned int ntotalargs MAYBE_UNUSED)
38749
+ if ((cif->abi & FFI_LINUX) != 0)
38750
+ cif->nfixedargs = nfixedargs;
38751
+#if _CALL_ELF != 2
38752
+ else if (cif->abi == FFI_COMPAT_LINUX64)
38754
+ /* This call is from old code. Don't touch cif->nfixedargs
38755
+ since old code will be using a smaller cif. */
38756
+ cif->flags |= FLAG_COMPAT;
38757
+ /* Translate to new abi value. */
38758
+ cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
38762
+ return FFI_BAD_ABI;
38763
+#if _CALL_ELF == 2
38764
+ cif->flags |= FLAG_ARG_NEEDS_PSAVE;
38766
+ return ffi_prep_cif_linux64_core (cif);
38770
+/* ffi_prep_args64 is called by the assembly routine once stack space
38771
+ has been allocated for the function's arguments.
38773
+ The stack layout we want looks like this:
38775
+ | Ret addr from ffi_call_LINUX64 8bytes | higher addresses
38776
+ |--------------------------------------------|
38777
+ | CR save area 8bytes |
38778
+ |--------------------------------------------|
38779
+ | Previous backchain pointer 8 | stack pointer here
38780
+ |--------------------------------------------|<+ <<< on entry to
38781
+ | Saved r28-r31 4*8 | | ffi_call_LINUX64
38782
+ |--------------------------------------------| |
38783
+ | GPR registers r3-r10 8*8 | |
38784
+ |--------------------------------------------| |
38785
+ | FPR registers f1-f13 (optional) 13*8 | |
38786
+ |--------------------------------------------| |
38787
+ | Parameter save area | |
38788
+ |--------------------------------------------| |
38789
+ | TOC save area 8 | |
38790
+ |--------------------------------------------| | stack |
38791
+ | Linker doubleword 8 | | grows |
38792
+ |--------------------------------------------| | down V
38793
+ | Compiler doubleword 8 | |
38794
+ |--------------------------------------------| | lower addresses
38795
+ | Space for callee's LR 8 | |
38796
+ |--------------------------------------------| |
38797
+ | CR save area 8 | |
38798
+ |--------------------------------------------| | stack pointer here
38799
+ | Current backchain pointer 8 |-/ during
38800
+ |--------------------------------------------| <<< ffi_call_LINUX64
38805
+ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
38807
+ const unsigned long bytes = ecif->cif->bytes;
38808
+ const unsigned long flags = ecif->cif->flags;
38813
+ unsigned long *ul;
38819
+ /* 'stacktop' points at the previous backchain pointer. */
38822
+ /* 'next_arg' points at the space for gpr3, and grows upwards as
38823
+ we use GPR registers, then continues at rest. */
38829
+ /* 'fpr_base' points at the space for fpr3, and grows upwards as
38830
+ we use FPR registers. */
38832
+ unsigned int fparg_count;
38834
+ unsigned int i, words, nargs, nfixedargs;
38836
+ double double_tmp;
38841
+ signed char **sc;
38842
+ unsigned char **uc;
38843
+ signed short **ss;
38844
+ unsigned short **us;
38846
+ unsigned int **ui;
38847
+ unsigned long **ul;
38851
+ unsigned long gprvalue;
38852
+ unsigned long align;
38854
+ stacktop.c = (char *) stack + bytes;
38855
+ gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
38856
+ gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
38857
+#if _CALL_ELF == 2
38858
+ rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
38860
+ rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
38862
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
38864
+ next_arg.ul = gpr_base.ul;
38866
+ /* Check that everything starts aligned properly. */
38867
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
38868
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
38869
+ FFI_ASSERT ((bytes & 0xF) == 0);
38871
+ /* Deal with return values that are actually pass-by-reference. */
38872
+ if (flags & FLAG_RETVAL_REFERENCE)
38873
+ *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
38875
+ /* Now for the arguments. */
38876
+ p_argv.v = ecif->avalue;
38877
+ nargs = ecif->cif->nargs;
38878
+#if _CALL_ELF != 2
38879
+ nfixedargs = (unsigned) -1;
38880
+ if ((flags & FLAG_COMPAT) == 0)
38882
+ nfixedargs = ecif->cif->nfixedargs;
38883
+ for (ptr = ecif->cif->arg_types, i = 0;
38885
+ i++, ptr++, p_argv.v++)
38887
+#if _CALL_ELF == 2
38888
+ unsigned int elt, elnum;
38891
+ switch ((*ptr)->type)
38893
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
38894
+ case FFI_TYPE_LONGDOUBLE:
38895
+ if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
38897
+ double_tmp = (*p_argv.d)[0];
38898
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
38900
+ *fpr_base.d++ = double_tmp;
38901
+# if _CALL_ELF != 2
38902
+ if ((flags & FLAG_COMPAT) != 0)
38903
+ *next_arg.d = double_tmp;
38907
+ *next_arg.d = double_tmp;
38908
+ if (++next_arg.ul == gpr_end.ul)
38909
+ next_arg.ul = rest.ul;
38911
+ double_tmp = (*p_argv.d)[1];
38912
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
38914
+ *fpr_base.d++ = double_tmp;
38915
+# if _CALL_ELF != 2
38916
+ if ((flags & FLAG_COMPAT) != 0)
38917
+ *next_arg.d = double_tmp;
38921
+ *next_arg.d = double_tmp;
38922
+ if (++next_arg.ul == gpr_end.ul)
38923
+ next_arg.ul = rest.ul;
38925
+ FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
38926
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
38929
+ /* Fall through. */
38931
+ case FFI_TYPE_DOUBLE:
38932
+ double_tmp = **p_argv.d;
38933
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
38935
+ *fpr_base.d++ = double_tmp;
38936
+#if _CALL_ELF != 2
38937
+ if ((flags & FLAG_COMPAT) != 0)
38938
+ *next_arg.d = double_tmp;
38942
+ *next_arg.d = double_tmp;
38943
+ if (++next_arg.ul == gpr_end.ul)
38944
+ next_arg.ul = rest.ul;
38946
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
38949
+ case FFI_TYPE_FLOAT:
38950
+ double_tmp = **p_argv.f;
38951
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
38953
+ *fpr_base.d++ = double_tmp;
38954
+#if _CALL_ELF != 2
38955
+ if ((flags & FLAG_COMPAT) != 0)
38956
+ *next_arg.f = (float) double_tmp;
38960
+ *next_arg.f = (float) double_tmp;
38961
+ if (++next_arg.ul == gpr_end.ul)
38962
+ next_arg.ul = rest.ul;
38964
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
38967
+ case FFI_TYPE_STRUCT:
38968
+ if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
38970
+ align = (*ptr)->alignment;
38974
+ next_arg.p = ALIGN (next_arg.p, align);
38976
+#if _CALL_ELF == 2
38977
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
38986
+ arg.v = *p_argv.v;
38987
+ if (elt == FFI_TYPE_FLOAT)
38991
+ double_tmp = *arg.f++;
38992
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64
38993
+ && i < nfixedargs)
38994
+ *fpr_base.d++ = double_tmp;
38996
+ *next_arg.f = (float) double_tmp;
38997
+ if (++next_arg.f == gpr_end.f)
38998
+ next_arg.f = rest.f;
39001
+ while (--elnum != 0);
39002
+ if ((next_arg.p & 3) != 0)
39004
+ if (++next_arg.f == gpr_end.f)
39005
+ next_arg.f = rest.f;
39011
+ double_tmp = *arg.d++;
39012
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
39013
+ *fpr_base.d++ = double_tmp;
39015
+ *next_arg.d = double_tmp;
39016
+ if (++next_arg.d == gpr_end.d)
39017
+ next_arg.d = rest.d;
39020
+ while (--elnum != 0);
39025
+ words = ((*ptr)->size + 7) / 8;
39026
+ if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
39028
+ size_t first = gpr_end.c - next_arg.c;
39029
+ memcpy (next_arg.c, *p_argv.c, first);
39030
+ memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
39031
+ next_arg.c = rest.c + words * 8 - first;
39035
+ char *where = next_arg.c;
39037
+#ifndef __LITTLE_ENDIAN__
39038
+ /* Structures with size less than eight bytes are passed
39040
+ if ((*ptr)->size < 8)
39041
+ where += 8 - (*ptr)->size;
39043
+ memcpy (where, *p_argv.c, (*ptr)->size);
39044
+ next_arg.ul += words;
39045
+ if (next_arg.ul == gpr_end.ul)
39046
+ next_arg.ul = rest.ul;
39051
+ case FFI_TYPE_UINT8:
39052
+ gprvalue = **p_argv.uc;
39054
+ case FFI_TYPE_SINT8:
39055
+ gprvalue = **p_argv.sc;
39057
+ case FFI_TYPE_UINT16:
39058
+ gprvalue = **p_argv.us;
39060
+ case FFI_TYPE_SINT16:
39061
+ gprvalue = **p_argv.ss;
39063
+ case FFI_TYPE_UINT32:
39064
+ gprvalue = **p_argv.ui;
39066
+ case FFI_TYPE_INT:
39067
+ case FFI_TYPE_SINT32:
39068
+ gprvalue = **p_argv.si;
39071
+ case FFI_TYPE_UINT64:
39072
+ case FFI_TYPE_SINT64:
39073
+ case FFI_TYPE_POINTER:
39074
+ gprvalue = **p_argv.ul;
39076
+ *next_arg.ul++ = gprvalue;
39077
+ if (next_arg.ul == gpr_end.ul)
39078
+ next_arg.ul = rest.ul;
39083
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
39084
+ || (next_arg.ul >= gpr_base.ul
39085
+ && next_arg.ul <= gpr_base.ul + 4));
39089
+#if _CALL_ELF == 2
39090
+#define MIN_CACHE_LINE_SIZE 8
39093
+flush_icache (char *wraddr, char *xaddr, int size)
39096
+ for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
39097
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
39098
+ : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
39099
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
39100
+ : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
39106
+ffi_prep_closure_loc_linux64 (ffi_closure *closure,
39108
+ void (*fun) (ffi_cif *, void *, void **, void *),
39112
+#if _CALL_ELF == 2
39113
+ unsigned int *tramp = (unsigned int *) &closure->tramp[0];
39115
+ if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
39116
+ return FFI_BAD_ABI;
39118
+ tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */
39119
+ tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */
39120
+ tramp[2] = 0x7d8903a6; /* mtctr 12 */
39121
+ tramp[3] = 0x4e800420; /* bctr */
39122
+ /* 1: .quad function_addr */
39123
+ /* 2: .quad context */
39124
+ *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
39125
+ *(void **) &tramp[6] = codeloc;
39126
+ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
39128
+ void **tramp = (void **) &closure->tramp[0];
39130
+ if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
39131
+ return FFI_BAD_ABI;
39133
+ /* Copy function address and TOC from ffi_closure_LINUX64. */
39134
+ memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
39135
+ tramp[2] = codeloc;
39138
+ closure->cif = cif;
39139
+ closure->fun = fun;
39140
+ closure->user_data = user_data;
39147
+ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
39148
+ unsigned long *pst, ffi_dblfl *pfr)
39150
+ /* rvalue is the pointer to space for return value in closure assembly */
39151
+ /* pst is the pointer to parameter save area
39152
+ (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
39153
+ /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
39156
+ ffi_type **arg_types;
39157
+ unsigned long i, avn, nfixedargs;
39159
+ ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
39160
+ unsigned long align;
39162
+ cif = closure->cif;
39163
+ avalue = alloca (cif->nargs * sizeof (void *));
39165
+ /* Copy the caller's structure return value address so that the
39166
+ closure returns the data directly to the caller. */
39167
+ if (cif->rtype->type == FFI_TYPE_STRUCT
39168
+ && (cif->flags & FLAG_RETURNS_SMST) == 0)
39170
+ rvalue = (void *) *pst;
39175
+ avn = cif->nargs;
39176
+#if _CALL_ELF != 2
39177
+ nfixedargs = (unsigned) -1;
39178
+ if ((cif->flags & FLAG_COMPAT) == 0)
39180
+ nfixedargs = cif->nfixedargs;
39181
+ arg_types = cif->arg_types;
39183
+ /* Grab the addresses of the arguments from the stack frame. */
39186
+ unsigned int elt, elnum;
39188
+ switch (arg_types[i]->type)
39190
+ case FFI_TYPE_SINT8:
39191
+ case FFI_TYPE_UINT8:
39192
+#ifndef __LITTLE_ENDIAN__
39193
+ avalue[i] = (char *) pst + 7;
39198
+ case FFI_TYPE_SINT16:
39199
+ case FFI_TYPE_UINT16:
39200
+#ifndef __LITTLE_ENDIAN__
39201
+ avalue[i] = (char *) pst + 6;
39206
+ case FFI_TYPE_SINT32:
39207
+ case FFI_TYPE_UINT32:
39208
+#ifndef __LITTLE_ENDIAN__
39209
+ avalue[i] = (char *) pst + 4;
39214
+ case FFI_TYPE_SINT64:
39215
+ case FFI_TYPE_UINT64:
39216
+ case FFI_TYPE_POINTER:
39221
+ case FFI_TYPE_STRUCT:
39222
+ if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
39224
+ align = arg_types[i]->alignment;
39228
+ pst = (unsigned long *) ALIGN ((size_t) pst, align);
39231
+#if _CALL_ELF == 2
39232
+ elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
39238
+ unsigned long *ul;
39244
+ /* Repackage the aggregate from its parts. The
39245
+ aggregate size is not greater than the space taken by
39246
+ the registers so store back to the register/parameter
39248
+ if (pfr + elnum <= end_pfr)
39253
+ avalue[i] = to.v;
39255
+ if (elt == FFI_TYPE_FLOAT)
39259
+ if (pfr < end_pfr && i < nfixedargs)
39261
+ *to.f = (float) pfr->d;
39269
+ while (--elnum != 0);
39275
+ if (pfr < end_pfr && i < nfixedargs)
39285
+ while (--elnum != 0);
39290
+#ifndef __LITTLE_ENDIAN__
39291
+ /* Structures with size less than eight bytes are passed
39293
+ if (arg_types[i]->size < 8)
39294
+ avalue[i] = (char *) pst + 8 - arg_types[i]->size;
39299
+ pst += (arg_types[i]->size + 7) / 8;
39302
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
39303
+ case FFI_TYPE_LONGDOUBLE:
39304
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
39306
+ if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
39313
+ if (pfr < end_pfr && i < nfixedargs)
39315
+ /* Passed partly in f13 and partly on the stack.
39316
+ Move it all to the stack. */
39317
+ *pst = *(unsigned long *) pfr;
39325
+ /* Fall through. */
39327
+ case FFI_TYPE_DOUBLE:
39328
+ /* On the outgoing stack all values are aligned to 8 */
39329
+ /* there are 13 64bit floating point registers */
39331
+ if (pfr < end_pfr && i < nfixedargs)
39341
+ case FFI_TYPE_FLOAT:
39342
+ if (pfr < end_pfr && i < nfixedargs)
39344
+ /* Float values are stored as doubles in the
39345
+ ffi_closure_LINUX64 code. Fix them here. */
39346
+ pfr->f = (float) pfr->d;
39363
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
39365
+ /* Tell ffi_closure_LINUX64 how to perform return type promotions. */
39366
+ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
39368
+ if ((cif->flags & FLAG_RETURNS_FP) == 0)
39369
+ return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
39370
+ else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
39371
+ return FFI_V2_TYPE_DOUBLE_HOMOG;
39373
+ return FFI_V2_TYPE_FLOAT_HOMOG;
39375
+ return cif->rtype->type;
39378
--- a/src/libffi/src/types.c
39379
+++ b/src/libffi/src/types.c
39384
+#define FFI_NONCONST_TYPEDEF(name, type, id) \
39385
+struct struct_align_##name { \
39389
+ffi_type ffi_type_##name = { \
39391
+ offsetof(struct struct_align_##name, x), \
39395
/* Size and alignment are fake here. They must not be 0. */
39396
const ffi_type ffi_type_void = {
39397
1, 1, FFI_TYPE_VOID, NULL
39400
const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
39401
#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
39402
+# if HAVE_LONG_DOUBLE_VARIANT
39403
+FFI_NONCONST_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
39405
FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
39408
--- a/src/libffi/src/prep_cif.c
39409
+++ b/src/libffi/src/prep_cif.c
39410
@@ -126,6 +126,10 @@
39414
+#if HAVE_LONG_DOUBLE_VARIANT
39415
+ ffi_prep_types (abi);
39418
/* Initialize the return type if necessary */
39419
if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
39420
return FFI_BAD_TYPEDEF;
39421
--- a/src/libffi/testsuite/Makefile.in
39422
+++ b/src/libffi/testsuite/Makefile.in
39426
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
39427
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
39428
INSTALL = @INSTALL@
39429
INSTALL_DATA = @INSTALL_DATA@
39430
INSTALL_PROGRAM = @INSTALL_PROGRAM@
39431
--- a/src/libffi/testsuite/libffi.call/cls_double_va.c
39432
+++ b/src/libffi/testsuite/libffi.call/cls_double_va.c
39435
/* This printf call is variadic */
39436
CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
39437
- arg_types) == FFI_OK);
39438
+ arg_types) == FFI_OK);
39441
args[1] = &doubleArg;
39442
@@ -45,19 +45,17 @@
39445
ffi_call(&cif, FFI_FN(printf), &res, args);
39446
- // { dg-output "7.0" }
39447
+ /* { dg-output "7.0" } */
39448
printf("res: %d\n", (int) res);
39449
- // { dg-output "\nres: 4" }
39450
+ /* { dg-output "\nres: 4" } */
39452
- /* The call to cls_double_va_fn is static, so have to use a normal prep_cif */
39453
- CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint, arg_types) == FFI_OK);
39454
+ CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL,
39455
+ code) == FFI_OK);
39457
- CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL, code) == FFI_OK);
39459
- res = ((int(*)(char*, double))(code))(format, doubleArg);
39460
- // { dg-output "\n7.0" }
39461
+ res = ((int(*)(char*, ...))(code))(format, doubleArg);
39462
+ /* { dg-output "\n7.0" } */
39463
printf("res: %d\n", (int) res);
39464
- // { dg-output "\nres: 4" }
39465
+ /* { dg-output "\nres: 4" } */
39469
--- a/src/libffi/testsuite/libffi.call/cls_longdouble_va.c
39470
+++ b/src/libffi/testsuite/libffi.call/cls_longdouble_va.c
39473
/* This printf call is variadic */
39474
CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
39475
- arg_types) == FFI_OK);
39476
+ arg_types) == FFI_OK);
39480
@@ -45,20 +45,17 @@
39483
ffi_call(&cif, FFI_FN(printf), &res, args);
39484
- // { dg-output "7.0" }
39485
+ /* { dg-output "7.0" } */
39486
printf("res: %d\n", (int) res);
39487
- // { dg-output "\nres: 4" }
39488
+ /* { dg-output "\nres: 4" } */
39490
- /* The call to cls_longdouble_va_fn is static, so have to use a normal prep_cif */
39491
- CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint,
39492
- arg_types) == FFI_OK);
39493
+ CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL,
39494
+ code) == FFI_OK);
39496
- CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL, code) == FFI_OK);
39498
- res = ((int(*)(char*, long double))(code))(format, ldArg);
39499
- // { dg-output "\n7.0" }
39500
+ res = ((int(*)(char*, ...))(code))(format, ldArg);
39501
+ /* { dg-output "\n7.0" } */
39502
printf("res: %d\n", (int) res);
39503
- // { dg-output "\nres: 4" }
39504
+ /* { dg-output "\nres: 4" } */
39508
--- a/src/libffi/configure.ac
39509
+++ b/src/libffi/configure.ac
39511
AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
39513
TARGETDIR="unknown"
39514
+HAVE_LONG_DOUBLE_VARIANT=0
39517
TARGET=AARCH64; TARGETDIR=aarch64
39518
@@ -162,6 +163,7 @@
39520
powerpc*-*-linux* | powerpc-*-sysv*)
39521
TARGET=POWERPC; TARGETDIR=powerpc
39522
+ HAVE_LONG_DOUBLE_VARIANT=1
39524
powerpc-*-amigaos*)
39525
TARGET=POWERPC; TARGETDIR=powerpc
39526
@@ -177,6 +179,7 @@
39528
powerpc-*-freebsd* | powerpc-*-openbsd*)
39529
TARGET=POWERPC_FREEBSD; TARGETDIR=powerpc
39530
+ HAVE_LONG_DOUBLE_VARIANT=1
39532
powerpc64-*-freebsd*)
39533
TARGET=POWERPC; TARGETDIR=powerpc
39534
@@ -273,14 +276,20 @@
39535
# Also AC_SUBST this variable for ffi.h.
39536
if test -z "$HAVE_LONG_DOUBLE"; then
39538
- if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
39539
- if test $ac_cv_sizeof_long_double != 0; then
39540
+ if test $ac_cv_sizeof_long_double != 0; then
39541
+ if test $HAVE_LONG_DOUBLE_VARIANT != 0; then
39542
+ AC_DEFINE(HAVE_LONG_DOUBLE_VARIANT, 1, [Define if you support more than one size of the long double type])
39544
- AC_DEFINE(HAVE_LONG_DOUBLE, 1, [Define if you have the long double type and it is bigger than a double])
39546
+ if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
39547
+ HAVE_LONG_DOUBLE=1
39548
+ AC_DEFINE(HAVE_LONG_DOUBLE, 1, [Define if you have the long double type and it is bigger than a double])
39553
AC_SUBST(HAVE_LONG_DOUBLE)
39554
+AC_SUBST(HAVE_LONG_DOUBLE_VARIANT)
39558
--- a/src/libffi/doc/libffi.texi
39559
+++ b/src/libffi/doc/libffi.texi
39560
@@ -184,11 +184,11 @@
39562
@var{rvalue} is a pointer to a chunk of memory that will hold the
39563
result of the function call. This must be large enough to hold the
39564
-result and must be suitably aligned; it is the caller's responsibility
39565
+result, no smaller than the system register size (generally 32 or 64
39566
+bits), and must be suitably aligned; it is the caller's responsibility
39567
to ensure this. If @var{cif} declares that the function returns
39568
@code{void} (using @code{ffi_type_void}), then @var{rvalue} is
39569
-ignored. If @var{rvalue} is @samp{NULL}, then the return value is
39573
@var{avalues} is a vector of @code{void *} pointers that point to the
39574
memory locations holding the argument values for a call. If @var{cif}
39575
@@ -214,7 +214,7 @@
39582
/* Initialize the argument info vectors */
39583
args[0] = &ffi_type_pointer;
39584
@@ -222,7 +222,7 @@
39586
/* Initialize the cif */
39587
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1,
39588
- &ffi_type_uint, args) == FFI_OK)
39589
+ &ffi_type_sint, args) == FFI_OK)
39591
s = "Hello World!";
39592
ffi_call(&cif, puts, &rc, values);
39593
@@ -360,7 +360,7 @@
39594
new @code{ffi_type} object for it.
39598
+@deftp {Data type} ffi_type
39599
The @code{ffi_type} has the following members:
39602
@@ -414,6 +414,7 @@
39605
tm_type.size = tm_type.alignment = 0;
39606
+ tm_type.type = FFI_TYPE_STRUCT;
39607
tm_type.elements = &tm_type_elements;
39609
for (i = 0; i < 9; i++)
39610
@@ -540,12 +541,14 @@
39613
/* Acts like puts with the file given at time of enclosure. */
39614
-void puts_binding(ffi_cif *cif, unsigned int *ret, void* args[],
39616
+void puts_binding(ffi_cif *cif, void *ret, void* args[],
39619
- *ret = fputs(*(char **)args[0], stream);
39620
+ *(ffi_arg *)ret = fputs(*(char **)args[0], (FILE *)stream);
39623
+typedef int (*puts_t)(char *);
39628
@@ -552,9 +555,9 @@
39630
ffi_closure *closure;
39632
- int (*bound_puts)(char *);
39633
+ void *bound_puts;
39637
/* Allocate closure and bound_puts */
39638
closure = ffi_closure_alloc(sizeof(ffi_closure), &bound_puts);
39640
@@ -565,13 +568,13 @@
39642
/* Initialize the cif */
39643
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1,
39644
- &ffi_type_uint, args) == FFI_OK)
39645
+ &ffi_type_sint, args) == FFI_OK)
39647
/* Initialize the closure, setting stream to stdout */
39648
- if (ffi_prep_closure_loc(closure, &cif, puts_binding,
39649
+ if (ffi_prep_closure_loc(closure, &cif, puts_binding,
39650
stdout, bound_puts) == FFI_OK)
39652
- rc = bound_puts("Hello World!");
39653
+ rc = ((puts_t)bound_puts)("Hello World!");
39654
/* rc now holds the result of the call to fputs */
39657
--- a/src/libffi/Makefile.am
39658
+++ b/src/libffi/Makefile.am
39659
@@ -15,10 +15,12 @@
39660
src/ia64/unix.S src/mips/ffi.c src/mips/n32.S src/mips/o32.S \
39661
src/mips/ffitarget.h src/m32r/ffi.c src/m32r/sysv.S \
39662
src/m32r/ffitarget.h src/m68k/ffi.c src/m68k/sysv.S \
39663
- src/m68k/ffitarget.h src/powerpc/ffi.c src/powerpc/sysv.S \
39664
- src/powerpc/linux64.S src/powerpc/linux64_closure.S \
39665
- src/powerpc/ppc_closure.S src/powerpc/asm.h \
39666
- src/powerpc/aix.S src/powerpc/darwin.S \
39667
+ src/m68k/ffitarget.h \
39668
+ src/powerpc/ffi.c src/powerpc/ffi_powerpc.h \
39669
+ src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c \
39670
+ src/powerpc/sysv.S src/powerpc/linux64.S \
39671
+ src/powerpc/linux64_closure.S src/powerpc/ppc_closure.S \
39672
+ src/powerpc/asm.h src/powerpc/aix.S src/powerpc/darwin.S \
39673
src/powerpc/aix_closure.S src/powerpc/darwin_closure.S \
39674
src/powerpc/ffi_darwin.c src/powerpc/ffitarget.h \
39675
src/s390/ffi.c src/s390/sysv.S src/s390/ffitarget.h \
39676
@@ -179,7 +181,7 @@
39677
nodist_libffi_la_SOURCES += src/m68k/ffi.c src/m68k/sysv.S
39680
-nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
39681
+nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
39684
nodist_libffi_la_SOURCES += src/powerpc/ffi_darwin.c src/powerpc/aix.S src/powerpc/aix_closure.S
39685
@@ -188,7 +190,7 @@
39686
nodist_libffi_la_SOURCES += src/powerpc/ffi_darwin.c src/powerpc/darwin.S src/powerpc/darwin_closure.S
39689
-nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
39690
+nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
39693
nodist_libffi_la_SOURCES += src/aarch64/sysv.S src/aarch64/ffi.c
39694
--- a/src/libffi/man/Makefile.in
39695
+++ b/src/libffi/man/Makefile.in
39696
@@ -111,6 +111,7 @@
39699
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
39700
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
39701
INSTALL = @INSTALL@
39702
INSTALL_DATA = @INSTALL_DATA@
39703
INSTALL_PROGRAM = @INSTALL_PROGRAM@
39704
--- a/src/libssp/configure
39705
+++ b/src/libssp/configure
39706
@@ -6385,7 +6385,7 @@
39710
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
39711
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
39712
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
39713
# Find out which ABI we are using.
39714
echo 'int i;' > conftest.$ac_ext
39715
@@ -6410,7 +6410,10 @@
39719
- ppc64-*linux*|powerpc64-*linux*)
39720
+ powerpc64le-*linux*)
39721
+ LD="${LD-ld} -m elf32lppclinux"
39723
+ powerpc64-*linux*)
39724
LD="${LD-ld} -m elf32ppclinux"
39727
@@ -6429,7 +6432,10 @@
39729
LD="${LD-ld} -m elf_x86_64"
39731
- ppc*-*linux*|powerpc*-*linux*)
39732
+ powerpcle-*linux*)
39733
+ LD="${LD-ld} -m elf64lppc"
39736
LD="${LD-ld} -m elf64ppc"
39738
s390*-*linux*|s390*-*tpf*)
39739
@@ -10658,7 +10664,7 @@
39740
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
39741
lt_status=$lt_dlunknown
39742
cat > conftest.$ac_ext <<_LT_EOF
39743
-#line 10661 "configure"
39744
+#line 10667 "configure"
39745
#include "confdefs.h"
39748
@@ -10764,7 +10770,7 @@
39749
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
39750
lt_status=$lt_dlunknown
39751
cat > conftest.$ac_ext <<_LT_EOF
39752
-#line 10767 "configure"
39753
+#line 10773 "configure"
39754
#include "confdefs.h"
39757
--- a/src/libcpp/ChangeLog.ibm
39758
+++ b/src/libcpp/ChangeLog.ibm
39760
+2013-11-18 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
39762
+ * lex.c (search_line_fast): Correct for little endian.
39764
--- a/src/libcpp/lex.c
39765
+++ b/src/libcpp/lex.c
39766
@@ -559,8 +559,13 @@
39767
beginning with all ones and shifting in zeros according to the
39768
mis-alignment. The LVSR instruction pulls the exact shift we
39769
want from the address. */
39770
+#ifdef __BIG_ENDIAN__
39771
mask = __builtin_vec_lvsr(0, s);
39772
mask = __builtin_vec_perm(zero, ones, mask);
39774
+ mask = __builtin_vec_lvsl(0, s);
39775
+ mask = __builtin_vec_perm(ones, zero, mask);
39779
/* While altivec loads mask addresses, we still need to align S so
39780
@@ -624,7 +629,11 @@
39781
/* L now contains 0xff in bytes for which we matched one of the
39782
relevant characters. We can find the byte index by finding
39783
its bit index and dividing by 8. */
39784
+#ifdef __BIG_ENDIAN__
39785
l = __builtin_clzl(l) >> 3;
39787
+ l = __builtin_ctzl(l) >> 3;