1
# DP: Changes from the ibm/gcc-4_8-branch (20140117)
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@206665 \
4
svn://gcc.gnu.org/svn/gcc/branches/ibm/gcc-4_8-branch@206670 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/configure
8
+++ b/src/libitm/configure
13
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
14
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
15
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
16
# Find out which ABI we are using.
17
echo 'int i;' > conftest.$ac_ext
18
@@ -7295,7 +7295,10 @@
22
- ppc64-*linux*|powerpc64-*linux*)
23
+ powerpc64le-*linux*)
24
+ LD="${LD-ld} -m elf32lppclinux"
27
LD="${LD-ld} -m elf32ppclinux"
30
@@ -7314,7 +7317,10 @@
32
LD="${LD-ld} -m elf_x86_64"
34
- ppc*-*linux*|powerpc*-*linux*)
36
+ LD="${LD-ld} -m elf64lppc"
39
LD="${LD-ld} -m elf64ppc"
41
s390*-*linux*|s390*-*tpf*)
42
@@ -11779,7 +11785,7 @@
43
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
44
lt_status=$lt_dlunknown
45
cat > conftest.$ac_ext <<_LT_EOF
46
-#line 11782 "configure"
47
+#line 11788 "configure"
51
@@ -11885,7 +11891,7 @@
52
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
53
lt_status=$lt_dlunknown
54
cat > conftest.$ac_ext <<_LT_EOF
55
-#line 11888 "configure"
56
+#line 11894 "configure"
60
@@ -17401,7 +17407,44 @@
64
+case "${target_cpu}" in
66
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports HTM" >&5
67
+$as_echo_n "checking if the assembler supports HTM... " >&6; }
68
+if test "${libitm_cv_as_htm+set}" = set; then :
69
+ $as_echo_n "(cached) " >&6
72
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
73
+/* end confdefs.h. */
78
+asm("tbegin. 0; tend. 0");
83
+if ac_fn_c_try_compile "$LINENO"; then :
84
+ libitm_cv_as_htm=yes
88
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
91
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libitm_cv_as_htm" >&5
92
+$as_echo "$libitm_cv_as_htm" >&6; }
93
+ if test x$libitm_cv_as_htm = xyes; then
95
+$as_echo "#define HAVE_AS_HTM 1" >>confdefs.h
102
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether weak refs work like ELF" >&5
103
$as_echo_n "checking whether weak refs work like ELF... " >&6; }
104
if test "${ac_cv_have_elf_style_weakref+set}" = set; then :
105
--- a/src/libitm/ChangeLog.ibm
106
+++ b/src/libitm/ChangeLog.ibm
108
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
110
+ Backport from mainline r204808:
112
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
114
+ * config/powerpc/sjlj.S [__powerpc64__ && _CALL_ELF == 2]:
115
+ (FUNC): Define ELFv2 variant.
117
+ (HIDDEN): Likewise.
120
+ (LR_SAVE): Likewise.
122
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
124
+ Backport from mainline
125
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
127
+ * acinclude.m4 (LIBITM_CHECK_AS_HTM): New.
128
+ * configure.ac: Use it.
129
+ (AC_CHECK_HEADERS): Check for sys/auxv.h.
130
+ (AC_CHECK_FUNCS): Check for getauxval.
131
+ * config.h.in, configure: Rebuild.
132
+ * configure.tgt (target_cpu): Add -mhtm to XCFLAGS.
133
+ * config/powerpc/target.h: Include sys/auxv.h and htmintrin.h.
134
+ (USE_HTM_FASTPATH): Define.
135
+ (_TBEGIN_STARTED, _TBEGIN_INDETERMINATE, _TBEGIN_PERSISTENT,
136
+ _HTM_RETRIES) New macros.
137
+ (htm_abort, htm_abort_should_retry, htm_available, htm_begin, htm_init,
138
+ htm_begin_success, htm_commit, htm_transaction_active): New functions.
139
--- a/src/libitm/configure.tgt
140
+++ b/src/libitm/configure.tgt
142
# work out any special compilation flags as necessary.
143
case "${target_cpu}" in
144
alpha*) ARCH=alpha ;;
145
- rs6000 | powerpc*) ARCH=powerpc ;;
147
+ XCFLAGS="${XCFLAGS} -mhtm"
153
--- a/src/libitm/config/powerpc/sjlj.S
154
+++ b/src/libitm/config/powerpc/sjlj.S
159
-#if defined(__powerpc64__) && defined(__ELF__)
160
+#if defined(__powerpc64__) && _CALL_ELF == 2
163
+ .type \name, @function
165
+0: addis 2,12,(.TOC.-0b)@ha
166
+ addi 2,2,(.TOC.-0b)@l
167
+ .localentry \name, . - \name
170
+ .size \name, . - \name
179
+#elif defined(__powerpc64__) && defined(__ELF__)
185
#if defined(_CALL_AIXDESC)
187
# define LR_SAVE 2*WS
188
+#elif _CALL_ELF == 2
190
+# define LR_SAVE 2*WS
191
#elif defined(_CALL_SYSV)
193
# define LR_SAVE 1*WS
194
--- a/src/libitm/config/powerpc/target.h
195
+++ b/src/libitm/config/powerpc/target.h
197
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
198
<http://www.gnu.org/licenses/>. */
200
+#ifdef HAVE_SYS_AUXV_H
201
+#include <sys/auxv.h>
204
namespace GTM HIDDEN {
206
typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
208
__asm volatile ("" : : : "memory");
211
+// Use HTM if it is supported by the system.
212
+// See gtm_thread::begin_transaction for how these functions are used.
213
+#if defined (__linux__) \
214
+ && defined (HAVE_AS_HTM) \
215
+ && defined (HAVE_GETAUXVAL) \
216
+ && defined (AT_HWCAP2) \
217
+ && defined (PPC_FEATURE2_HAS_HTM)
219
+#include <htmintrin.h>
221
+#define USE_HTM_FASTPATH
223
+#define _TBEGIN_STARTED 0
224
+#define _TBEGIN_INDETERMINATE 1
225
+#define _TBEGIN_PERSISTENT 2
227
+/* Number of retries for transient failures. */
228
+#define _HTM_RETRIES 10
231
+htm_available (void)
233
+ return (getauxval (AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) ? true : false;
236
+static inline uint32_t
239
+ // Maximum number of times we try to execute a transaction
240
+ // as a HW transaction.
241
+ return htm_available () ? _HTM_RETRIES : 0;
244
+static inline uint32_t
247
+ if (__builtin_expect (__builtin_tbegin (0), 1))
248
+ return _TBEGIN_STARTED;
250
+ if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ()))
251
+ return _TBEGIN_PERSISTENT;
253
+ return _TBEGIN_INDETERMINATE;
257
+htm_begin_success (uint32_t begin_ret)
259
+ return begin_ret == _TBEGIN_STARTED;
265
+ __builtin_tend (0);
271
+ __builtin_tabort (0);
275
+htm_abort_should_retry (uint32_t begin_ret)
277
+ return begin_ret != _TBEGIN_PERSISTENT;
280
+/* Returns true iff a hardware transaction is currently being executed. */
282
+htm_transaction_active (void)
284
+ return (_HTM_STATE (__builtin_ttest ()) == _HTM_TRANSACTIONAL);
290
--- a/src/libitm/acinclude.m4
291
+++ b/src/libitm/acinclude.m4
296
+dnl Check if as supports HTM instructions.
297
+AC_DEFUN([LIBITM_CHECK_AS_HTM], [
298
+case "${target_cpu}" in
300
+ AC_CACHE_CHECK([if the assembler supports HTM], libitm_cv_as_htm, [
301
+ AC_TRY_COMPILE([], [asm("tbegin. 0; tend. 0");],
302
+ [libitm_cv_as_htm=yes], [libitm_cv_as_htm=no])
304
+ if test x$libitm_cv_as_htm = xyes; then
305
+ AC_DEFINE(HAVE_AS_HTM, 1, [Define to 1 if the assembler supports HTM.])
310
sinclude(../libtool.m4)
311
dnl The lines below arrange for aclocal not to bring an installed
312
dnl libtool.m4 into aclocal.m4, while still arranging for automake to
315
@@ -1220,7 +1220,7 @@
319
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
320
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
321
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
322
# Find out which ABI we are using.
323
echo 'int i;' > conftest.$ac_ext
324
@@ -1241,7 +1241,10 @@
328
- ppc64-*linux*|powerpc64-*linux*)
329
+ powerpc64le-*linux*)
330
+ LD="${LD-ld} -m elf32lppclinux"
333
LD="${LD-ld} -m elf32ppclinux"
336
@@ -1260,7 +1263,10 @@
338
LD="${LD-ld} -m elf_x86_64"
340
- ppc*-*linux*|powerpc*-*linux*)
342
+ LD="${LD-ld} -m elf64lppc"
345
LD="${LD-ld} -m elf64ppc"
347
s390*-*linux*|s390*-*tpf*)
348
--- a/src/libgomp/configure
349
+++ b/src/libgomp/configure
350
@@ -6580,7 +6580,7 @@
354
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
355
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
356
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
357
# Find out which ABI we are using.
358
echo 'int i;' > conftest.$ac_ext
359
@@ -6605,7 +6605,10 @@
363
- ppc64-*linux*|powerpc64-*linux*)
364
+ powerpc64le-*linux*)
365
+ LD="${LD-ld} -m elf32lppclinux"
368
LD="${LD-ld} -m elf32ppclinux"
371
@@ -6624,7 +6627,10 @@
373
LD="${LD-ld} -m elf_x86_64"
375
- ppc*-*linux*|powerpc*-*linux*)
377
+ LD="${LD-ld} -m elf64lppc"
380
LD="${LD-ld} -m elf64ppc"
382
s390*-*linux*|s390*-*tpf*)
383
@@ -11088,7 +11094,7 @@
384
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
385
lt_status=$lt_dlunknown
386
cat > conftest.$ac_ext <<_LT_EOF
387
-#line 11091 "configure"
388
+#line 11097 "configure"
389
#include "confdefs.h"
392
@@ -11194,7 +11200,7 @@
393
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
394
lt_status=$lt_dlunknown
395
cat > conftest.$ac_ext <<_LT_EOF
396
-#line 11197 "configure"
397
+#line 11203 "configure"
398
#include "confdefs.h"
401
--- a/src/libquadmath/configure
402
+++ b/src/libquadmath/configure
403
@@ -6248,7 +6248,7 @@
407
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
408
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
409
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
410
# Find out which ABI we are using.
411
echo 'int i;' > conftest.$ac_ext
412
@@ -6273,7 +6273,10 @@
416
- ppc64-*linux*|powerpc64-*linux*)
417
+ powerpc64le-*linux*)
418
+ LD="${LD-ld} -m elf32lppclinux"
421
LD="${LD-ld} -m elf32ppclinux"
424
@@ -6292,7 +6295,10 @@
426
LD="${LD-ld} -m elf_x86_64"
428
- ppc*-*linux*|powerpc*-*linux*)
430
+ LD="${LD-ld} -m elf64lppc"
433
LD="${LD-ld} -m elf64ppc"
435
s390*-*linux*|s390*-*tpf*)
436
@@ -10521,7 +10527,7 @@
437
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
438
lt_status=$lt_dlunknown
439
cat > conftest.$ac_ext <<_LT_EOF
440
-#line 10524 "configure"
441
+#line 10530 "configure"
442
#include "confdefs.h"
445
@@ -10627,7 +10633,7 @@
446
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
447
lt_status=$lt_dlunknown
448
cat > conftest.$ac_ext <<_LT_EOF
449
-#line 10630 "configure"
450
+#line 10636 "configure"
451
#include "confdefs.h"
454
--- a/src/libsanitizer/configure
455
+++ b/src/libsanitizer/configure
456
@@ -6604,7 +6604,7 @@
460
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
461
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
462
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
463
# Find out which ABI we are using.
464
echo 'int i;' > conftest.$ac_ext
465
@@ -6629,7 +6629,10 @@
469
- ppc64-*linux*|powerpc64-*linux*)
470
+ powerpc64le-*linux*)
471
+ LD="${LD-ld} -m elf32lppclinux"
474
LD="${LD-ld} -m elf32ppclinux"
477
@@ -6648,7 +6651,10 @@
479
LD="${LD-ld} -m elf_x86_64"
481
- ppc*-*linux*|powerpc*-*linux*)
483
+ LD="${LD-ld} -m elf64lppc"
486
LD="${LD-ld} -m elf64ppc"
488
s390*-*linux*|s390*-*tpf*)
489
@@ -11111,7 +11117,7 @@
490
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
491
lt_status=$lt_dlunknown
492
cat > conftest.$ac_ext <<_LT_EOF
493
-#line 11114 "configure"
494
+#line 11120 "configure"
495
#include "confdefs.h"
498
@@ -11217,7 +11223,7 @@
499
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
500
lt_status=$lt_dlunknown
501
cat > conftest.$ac_ext <<_LT_EOF
502
-#line 11220 "configure"
503
+#line 11226 "configure"
504
#include "confdefs.h"
507
--- a/src/zlib/configure
508
+++ b/src/zlib/configure
509
@@ -5853,7 +5853,7 @@
513
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
514
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
515
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
516
# Find out which ABI we are using.
517
echo 'int i;' > conftest.$ac_ext
518
@@ -5878,7 +5878,10 @@
522
- ppc64-*linux*|powerpc64-*linux*)
523
+ powerpc64le-*linux*)
524
+ LD="${LD-ld} -m elf32lppclinux"
527
LD="${LD-ld} -m elf32ppclinux"
530
@@ -5897,7 +5900,10 @@
532
LD="${LD-ld} -m elf_x86_64"
534
- ppc*-*linux*|powerpc*-*linux*)
536
+ LD="${LD-ld} -m elf64lppc"
539
LD="${LD-ld} -m elf64ppc"
541
s390*-*linux*|s390*-*tpf*)
542
@@ -10394,7 +10400,7 @@
543
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
544
lt_status=$lt_dlunknown
545
cat > conftest.$ac_ext <<_LT_EOF
546
-#line 10397 "configure"
547
+#line 10403 "configure"
548
#include "confdefs.h"
551
@@ -10500,7 +10506,7 @@
552
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
553
lt_status=$lt_dlunknown
554
cat > conftest.$ac_ext <<_LT_EOF
555
-#line 10503 "configure"
556
+#line 10509 "configure"
557
#include "confdefs.h"
560
--- a/src/libstdc++-v3/configure
561
+++ b/src/libstdc++-v3/configure
562
@@ -7111,7 +7111,7 @@
566
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
567
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
568
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
569
# Find out which ABI we are using.
570
echo 'int i;' > conftest.$ac_ext
571
@@ -7136,7 +7136,10 @@
575
- ppc64-*linux*|powerpc64-*linux*)
576
+ powerpc64le-*linux*)
577
+ LD="${LD-ld} -m elf32lppclinux"
580
LD="${LD-ld} -m elf32ppclinux"
583
@@ -7155,7 +7158,10 @@
585
LD="${LD-ld} -m elf_x86_64"
587
- ppc*-*linux*|powerpc*-*linux*)
589
+ LD="${LD-ld} -m elf64lppc"
592
LD="${LD-ld} -m elf64ppc"
594
s390*-*linux*|s390*-*tpf*)
595
@@ -11513,7 +11519,7 @@
596
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
597
lt_status=$lt_dlunknown
598
cat > conftest.$ac_ext <<_LT_EOF
599
-#line 11516 "configure"
600
+#line 11522 "configure"
601
#include "confdefs.h"
604
@@ -11619,7 +11625,7 @@
605
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
606
lt_status=$lt_dlunknown
607
cat > conftest.$ac_ext <<_LT_EOF
608
-#line 11622 "configure"
609
+#line 11628 "configure"
610
#include "confdefs.h"
613
@@ -15033,7 +15039,7 @@
615
# Fake what AC_TRY_COMPILE does. XXX Look at redoing this new-style.
616
cat > conftest.$ac_ext << EOF
617
-#line 15036 "configure"
618
+#line 15042 "configure"
622
@@ -15383,7 +15389,7 @@
623
# Fake what AC_TRY_COMPILE does.
625
cat > conftest.$ac_ext << EOF
626
-#line 15386 "configure"
627
+#line 15392 "configure"
630
typedef bool atomic_type;
631
@@ -15418,7 +15424,7 @@
634
cat > conftest.$ac_ext << EOF
635
-#line 15421 "configure"
636
+#line 15427 "configure"
639
typedef short atomic_type;
640
@@ -15453,7 +15459,7 @@
643
cat > conftest.$ac_ext << EOF
644
-#line 15456 "configure"
645
+#line 15462 "configure"
648
// NB: _Atomic_word not necessarily int.
649
@@ -15489,7 +15495,7 @@
652
cat > conftest.$ac_ext << EOF
653
-#line 15492 "configure"
654
+#line 15498 "configure"
657
typedef long long atomic_type;
658
@@ -15568,7 +15574,7 @@
659
# unnecessary for this test.
661
cat > conftest.$ac_ext << EOF
662
-#line 15571 "configure"
663
+#line 15577 "configure"
667
@@ -15610,7 +15616,7 @@
668
# unnecessary for this test.
670
cat > conftest.$ac_ext << EOF
671
-#line 15613 "configure"
672
+#line 15619 "configure"
673
template<typename T1, typename T2>
675
{ typedef T2 type; };
676
@@ -15644,7 +15650,7 @@
679
cat > conftest.$ac_ext << EOF
680
-#line 15647 "configure"
681
+#line 15653 "configure"
682
template<typename T1, typename T2>
684
{ typedef T2 type; };
685
--- a/src/libstdc++-v3/scripts/extract_symvers.in
686
+++ b/src/libstdc++-v3/scripts/extract_symvers.in
688
# present on Solaris.
690
sed -e 's/ \[<other>: [A-Fa-f0-9]*\] //' -e '/\.dynsym/,/^$/p;d' |\
691
+ sed -e 's/ \[<localentry>: [0-9]*\] //' |\
692
egrep -v ' (LOCAL|UND) ' |\
693
egrep -v ' (_DYNAMIC|_GLOBAL_OFFSET_TABLE_|_PROCEDURE_LINKAGE_TABLE_|_edata|_end|_etext)$' |\
694
sed -e 's/ <processor specific>: / <processor_specific>:_/g' |\
695
--- a/src/libstdc++-v3/ChangeLog.ibm
696
+++ b/src/libstdc++-v3/ChangeLog.ibm
698
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
700
+ Backport from mainline r204808:
702
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
704
+ * scripts/extract_symvers.in: Ignore <localentry: > fields
705
+ in readelf --symbols output.
707
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
709
+ Backport from mainline
710
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
713
+ * include/tr1/cmath: Remove pow(double,double) overload, remove a
714
+ duplicated comment about DR 550. Add a comment to explain the issue.
715
+ * testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc: New.
717
--- a/src/libstdc++-v3/include/tr1/cmath
718
+++ b/src/libstdc++-v3/include/tr1/cmath
720
nexttoward(_Tp __x, long double __y)
721
{ return __builtin_nexttoward(__x, __y); }
723
- // DR 550. What should the return type of pow(float,int) be?
724
- // NB: C++0x and TR1 != C++03.
728
remainder(float __x, float __y)
729
{ return __builtin_remainderf(__x, __y); }
730
@@ -985,10 +981,19 @@
732
// DR 550. What should the return type of pow(float,int) be?
733
// NB: C++0x and TR1 != C++03.
735
- pow(double __x, double __y)
736
- { return std::pow(__x, __y); }
738
+ // The std::tr1::pow(double, double) overload cannot be provided
739
+ // here, because it would clash with ::pow(double,double) declared
740
+ // in <math.h>, if <tr1/math.h> is included at the same time (raised
741
+ // by the fix of PR c++/54537). It is not possible either to use the
742
+ // using-declaration 'using ::pow;' here, because if the user code
743
+ // has a 'using std::pow;', it would bring the pow(*,int) averloads
744
+ // in the tr1 namespace, which is undesirable. Consequently, the
745
+ // solution is to forward std::tr1::pow(double,double) to
746
+ // std::pow(double,double) via the templatized version below. See
747
+ // the discussion about this issue here:
748
+ // http://gcc.gnu.org/ml/gcc-patches/2012-09/msg01278.html
751
pow(float __x, float __y)
752
{ return std::pow(__x, __y); }
753
--- a/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc
754
+++ b/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/cmath/pow_cmath.cc
756
+// { dg-do compile }
758
+// Copyright (C) 2013 Free Software Foundation, Inc.
760
+// This file is part of the GNU ISO C++ Library. This library is free
761
+// software; you can redistribute it and/or modify it under the
762
+// terms of the GNU General Public License as published by the
763
+// Free Software Foundation; either version 3, or (at your option)
764
+// any later version.
766
+// This library is distributed in the hope that it will be useful,
767
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
768
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
769
+// GNU General Public License for more details.
771
+// You should have received a copy of the GNU General Public License along
772
+// with this library; see the file COPYING3. If not see
773
+// <http://www.gnu.org/licenses/>.
777
+#include <tr1/cmath>
778
+#include <testsuite_tr1.h>
783
+ using namespace __gnu_test;
785
+ float x = 2080703.375F;
786
+ check_ret_type<float>(std::pow(x, 2));
787
+ check_ret_type<double>(std::tr1::pow(x, 2));
789
--- a/src/libmudflap/configure
790
+++ b/src/libmudflap/configure
791
@@ -6377,7 +6377,7 @@
795
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
796
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
797
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
798
# Find out which ABI we are using.
799
echo 'int i;' > conftest.$ac_ext
800
@@ -6402,7 +6402,10 @@
804
- ppc64-*linux*|powerpc64-*linux*)
805
+ powerpc64le-*linux*)
806
+ LD="${LD-ld} -m elf32lppclinux"
809
LD="${LD-ld} -m elf32ppclinux"
812
@@ -6421,7 +6424,10 @@
814
LD="${LD-ld} -m elf_x86_64"
816
- ppc*-*linux*|powerpc*-*linux*)
818
+ LD="${LD-ld} -m elf64lppc"
821
LD="${LD-ld} -m elf64ppc"
823
s390*-*linux*|s390*-*tpf*)
824
@@ -10615,7 +10621,7 @@
825
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
826
lt_status=$lt_dlunknown
827
cat > conftest.$ac_ext <<_LT_EOF
828
-#line 10618 "configure"
829
+#line 10624 "configure"
830
#include "confdefs.h"
833
@@ -10721,7 +10727,7 @@
834
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
835
lt_status=$lt_dlunknown
836
cat > conftest.$ac_ext <<_LT_EOF
837
-#line 10724 "configure"
838
+#line 10730 "configure"
839
#include "confdefs.h"
842
--- a/src/boehm-gc/configure
843
+++ b/src/boehm-gc/configure
844
@@ -6770,7 +6770,7 @@
848
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
849
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
850
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
851
# Find out which ABI we are using.
852
echo 'int i;' > conftest.$ac_ext
853
@@ -6795,7 +6795,10 @@
857
- ppc64-*linux*|powerpc64-*linux*)
858
+ powerpc64le-*linux*)
859
+ LD="${LD-ld} -m elf32lppclinux"
862
LD="${LD-ld} -m elf32ppclinux"
865
@@ -6814,7 +6817,10 @@
867
LD="${LD-ld} -m elf_x86_64"
869
- ppc*-*linux*|powerpc*-*linux*)
871
+ LD="${LD-ld} -m elf64lppc"
874
LD="${LD-ld} -m elf64ppc"
876
s390*-*linux*|s390*-*tpf*)
877
@@ -11312,7 +11318,7 @@
878
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
879
lt_status=$lt_dlunknown
880
cat > conftest.$ac_ext <<_LT_EOF
881
-#line 11315 "configure"
882
+#line 11321 "configure"
883
#include "confdefs.h"
886
@@ -11418,7 +11424,7 @@
887
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
888
lt_status=$lt_dlunknown
889
cat > conftest.$ac_ext <<_LT_EOF
890
-#line 11421 "configure"
891
+#line 11427 "configure"
892
#include "confdefs.h"
895
--- a/src/lto-plugin/configure
896
+++ b/src/lto-plugin/configure
897
@@ -6044,7 +6044,7 @@
901
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
902
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
903
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
904
# Find out which ABI we are using.
905
echo 'int i;' > conftest.$ac_ext
906
@@ -6069,7 +6069,10 @@
910
- ppc64-*linux*|powerpc64-*linux*)
911
+ powerpc64le-*linux*)
912
+ LD="${LD-ld} -m elf32lppclinux"
915
LD="${LD-ld} -m elf32ppclinux"
918
@@ -6088,7 +6091,10 @@
920
LD="${LD-ld} -m elf_x86_64"
922
- ppc*-*linux*|powerpc*-*linux*)
924
+ LD="${LD-ld} -m elf64lppc"
927
LD="${LD-ld} -m elf64ppc"
929
s390*-*linux*|s390*-*tpf*)
930
@@ -10552,7 +10558,7 @@
931
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
932
lt_status=$lt_dlunknown
933
cat > conftest.$ac_ext <<_LT_EOF
934
-#line 10555 "configure"
935
+#line 10561 "configure"
936
#include "confdefs.h"
939
@@ -10658,7 +10664,7 @@
940
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
941
lt_status=$lt_dlunknown
942
cat > conftest.$ac_ext <<_LT_EOF
943
-#line 10661 "configure"
944
+#line 10667 "configure"
945
#include "confdefs.h"
948
--- a/src/libatomic/configure
949
+++ b/src/libatomic/configure
950
@@ -6505,7 +6505,7 @@
954
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
955
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
956
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
957
# Find out which ABI we are using.
958
echo 'int i;' > conftest.$ac_ext
959
@@ -6530,7 +6530,10 @@
963
- ppc64-*linux*|powerpc64-*linux*)
964
+ powerpc64le-*linux*)
965
+ LD="${LD-ld} -m elf32lppclinux"
968
LD="${LD-ld} -m elf32ppclinux"
971
@@ -6549,7 +6552,10 @@
973
LD="${LD-ld} -m elf_x86_64"
975
- ppc*-*linux*|powerpc*-*linux*)
977
+ LD="${LD-ld} -m elf64lppc"
980
LD="${LD-ld} -m elf64ppc"
982
s390*-*linux*|s390*-*tpf*)
983
@@ -11013,7 +11019,7 @@
984
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
985
lt_status=$lt_dlunknown
986
cat > conftest.$ac_ext <<_LT_EOF
987
-#line 11016 "configure"
988
+#line 11022 "configure"
989
#include "confdefs.h"
992
@@ -11119,7 +11125,7 @@
993
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
994
lt_status=$lt_dlunknown
995
cat > conftest.$ac_ext <<_LT_EOF
996
-#line 11122 "configure"
997
+#line 11128 "configure"
998
#include "confdefs.h"
1001
--- a/src/libbacktrace/configure
1002
+++ b/src/libbacktrace/configure
1003
@@ -6842,7 +6842,7 @@
1007
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1008
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1009
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1010
# Find out which ABI we are using.
1011
echo 'int i;' > conftest.$ac_ext
1012
@@ -6867,7 +6867,10 @@
1016
- ppc64-*linux*|powerpc64-*linux*)
1017
+ powerpc64le-*linux*)
1018
+ LD="${LD-ld} -m elf32lppclinux"
1020
+ powerpc64-*linux*)
1021
LD="${LD-ld} -m elf32ppclinux"
1024
@@ -6886,7 +6889,10 @@
1026
LD="${LD-ld} -m elf_x86_64"
1028
- ppc*-*linux*|powerpc*-*linux*)
1029
+ powerpcle-*linux*)
1030
+ LD="${LD-ld} -m elf64lppc"
1033
LD="${LD-ld} -m elf64ppc"
1035
s390*-*linux*|s390*-*tpf*)
1036
@@ -11081,7 +11087,7 @@
1037
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1038
lt_status=$lt_dlunknown
1039
cat > conftest.$ac_ext <<_LT_EOF
1040
-#line 11084 "configure"
1041
+#line 11090 "configure"
1042
#include "confdefs.h"
1045
@@ -11187,7 +11193,7 @@
1046
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1047
lt_status=$lt_dlunknown
1048
cat > conftest.$ac_ext <<_LT_EOF
1049
-#line 11190 "configure"
1050
+#line 11196 "configure"
1051
#include "confdefs.h"
1054
--- a/src/libjava/libltdl/configure
1055
+++ b/src/libjava/libltdl/configure
1056
@@ -4806,7 +4806,7 @@
1060
-x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1061
+x86_64-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1062
# Find out which ABI we are using.
1063
echo 'int i;' > conftest.$ac_ext
1064
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
1065
@@ -4820,7 +4820,10 @@
1067
LD="${LD-ld} -m elf_i386"
1069
- ppc64-*linux*|powerpc64-*linux*)
1070
+ powerpc64le-*linux*)
1071
+ LD="${LD-ld} -m elf32lppclinux"
1073
+ powerpc64-*linux*)
1074
LD="${LD-ld} -m elf32ppclinux"
1077
@@ -4836,7 +4839,10 @@
1079
LD="${LD-ld} -m elf_x86_64"
1081
- ppc*-*linux*|powerpc*-*linux*)
1082
+ powerpcle-*linux*)
1083
+ LD="${LD-ld} -m elf64lppc"
1086
LD="${LD-ld} -m elf64ppc"
1089
@@ -6456,11 +6462,11 @@
1090
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1091
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1092
-e 's:$: $lt_compiler_flag:'`
1093
- (eval echo "\"\$as_me:6459: $lt_compile\"" >&5)
1094
+ (eval echo "\"\$as_me:6465: $lt_compile\"" >&5)
1095
(eval "$lt_compile" 2>conftest.err)
1097
cat conftest.err >&5
1098
- echo "$as_me:6463: \$? = $ac_status" >&5
1099
+ echo "$as_me:6469: \$? = $ac_status" >&5
1100
if (exit $ac_status) && test -s "$ac_outfile"; then
1101
# The compiler can only warn and ignore the option if not recognized
1102
# So say no if there are warnings other than the usual output.
1103
@@ -6718,11 +6724,11 @@
1104
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1105
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1106
-e 's:$: $lt_compiler_flag:'`
1107
- (eval echo "\"\$as_me:6721: $lt_compile\"" >&5)
1108
+ (eval echo "\"\$as_me:6727: $lt_compile\"" >&5)
1109
(eval "$lt_compile" 2>conftest.err)
1111
cat conftest.err >&5
1112
- echo "$as_me:6725: \$? = $ac_status" >&5
1113
+ echo "$as_me:6731: \$? = $ac_status" >&5
1114
if (exit $ac_status) && test -s "$ac_outfile"; then
1115
# The compiler can only warn and ignore the option if not recognized
1116
# So say no if there are warnings other than the usual output.
1117
@@ -6780,11 +6786,11 @@
1118
-e 's:.*FLAGS}? :&$lt_compiler_flag :; t' \
1119
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
1120
-e 's:$: $lt_compiler_flag:'`
1121
- (eval echo "\"\$as_me:6783: $lt_compile\"" >&5)
1122
+ (eval echo "\"\$as_me:6789: $lt_compile\"" >&5)
1123
(eval "$lt_compile" 2>out/conftest.err)
1125
cat out/conftest.err >&5
1126
- echo "$as_me:6787: \$? = $ac_status" >&5
1127
+ echo "$as_me:6793: \$? = $ac_status" >&5
1128
if (exit $ac_status) && test -s out/conftest2.$ac_objext
1130
# The compiler can only warn and ignore the option if not recognized
1131
@@ -8099,7 +8105,7 @@
1134
x86_64*|s390x*|powerpc64*)
1135
- echo '#line 8102 "configure"' > conftest.$ac_ext
1136
+ echo '#line 8108 "configure"' > conftest.$ac_ext
1137
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
1138
(eval $ac_compile) 2>&5
1140
@@ -8652,7 +8658,7 @@
1141
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1142
lt_status=$lt_dlunknown
1143
cat > conftest.$ac_ext <<EOF
1144
-#line 8655 "configure"
1145
+#line 8661 "configure"
1146
#include "confdefs.h"
1149
@@ -8750,7 +8756,7 @@
1150
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1151
lt_status=$lt_dlunknown
1152
cat > conftest.$ac_ext <<EOF
1153
-#line 8753 "configure"
1154
+#line 8759 "configure"
1155
#include "confdefs.h"
1158
@@ -10591,7 +10597,7 @@
1159
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1160
lt_status=$lt_dlunknown
1161
cat > conftest.$ac_ext <<EOF
1162
-#line 10594 "configure"
1163
+#line 10600 "configure"
1164
#include "confdefs.h"
1167
--- a/src/libjava/libltdl/acinclude.m4
1168
+++ b/src/libjava/libltdl/acinclude.m4
1173
-x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1174
+x86_64-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
1175
# Find out which ABI we are using.
1176
echo 'int i;' > conftest.$ac_ext
1177
if AC_TRY_EVAL(ac_compile); then
1178
@@ -529,7 +529,10 @@
1180
LD="${LD-ld} -m elf_i386"
1182
- ppc64-*linux*|powerpc64-*linux*)
1183
+ powerpc64le-*linux*)
1184
+ LD="${LD-ld} -m elf32lppclinux"
1186
+ powerpc64-*linux*)
1187
LD="${LD-ld} -m elf32ppclinux"
1190
@@ -545,7 +548,10 @@
1192
LD="${LD-ld} -m elf_x86_64"
1194
- ppc*-*linux*|powerpc*-*linux*)
1195
+ powerpcle-*linux*)
1196
+ LD="${LD-ld} -m elf64lppc"
1199
LD="${LD-ld} -m elf64ppc"
1202
--- a/src/libjava/classpath/configure
1203
+++ b/src/libjava/classpath/configure
1204
@@ -7577,7 +7577,7 @@
1208
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1209
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1210
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1211
# Find out which ABI we are using.
1212
echo 'int i;' > conftest.$ac_ext
1213
@@ -7602,7 +7602,10 @@
1217
- ppc64-*linux*|powerpc64-*linux*)
1218
+ powerpc64le-*linux*)
1219
+ LD="${LD-ld} -m elf32lppclinux"
1221
+ powerpc64-*linux*)
1222
LD="${LD-ld} -m elf32ppclinux"
1225
@@ -7621,7 +7624,10 @@
1227
LD="${LD-ld} -m elf_x86_64"
1229
- ppc*-*linux*|powerpc*-*linux*)
1230
+ powerpcle-*linux*)
1231
+ LD="${LD-ld} -m elf64lppc"
1234
LD="${LD-ld} -m elf64ppc"
1236
s390*-*linux*|s390*-*tpf*)
1237
@@ -11820,7 +11826,7 @@
1238
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1239
lt_status=$lt_dlunknown
1240
cat > conftest.$ac_ext <<_LT_EOF
1241
-#line 11823 "configure"
1242
+#line 11829 "configure"
1243
#include "confdefs.h"
1246
@@ -11926,7 +11932,7 @@
1247
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1248
lt_status=$lt_dlunknown
1249
cat > conftest.$ac_ext <<_LT_EOF
1250
-#line 11929 "configure"
1251
+#line 11935 "configure"
1252
#include "confdefs.h"
1255
@@ -25300,7 +25306,7 @@
1256
JAVA_TEST=Object.java
1257
CLASS_TEST=Object.class
1258
cat << \EOF > $JAVA_TEST
1259
-/* #line 25303 "configure" */
1260
+/* #line 25309 "configure" */
1264
@@ -25393,7 +25399,7 @@
1265
if uudecode$EXEEXT Test.uue; then
1266
ac_cv_prog_uudecode_base64=yes
1268
- echo "configure: 25396: uudecode had trouble decoding base 64 file 'Test.uue'" >&5
1269
+ echo "configure: 25402: uudecode had trouble decoding base 64 file 'Test.uue'" >&5
1270
echo "configure: failed file was:" >&5
1272
ac_cv_prog_uudecode_base64=no
1273
@@ -25421,7 +25427,7 @@
1274
CLASS_TEST=Test.class
1276
cat << \EOF > $JAVA_TEST
1277
-/* [#]line 25424 "configure" */
1278
+/* [#]line 25430 "configure" */
1280
public static void main (String args[]) {
1282
@@ -25629,7 +25635,7 @@
1284
CLASS_TEST=Test.class
1285
cat << \EOF > $JAVA_TEST
1286
- /* #line 25632 "configure" */
1287
+ /* #line 25638 "configure" */
1290
public static void main(String args)
1291
--- a/src/libjava/configure
1292
+++ b/src/libjava/configure
1293
@@ -8842,7 +8842,7 @@
1297
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1298
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1299
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1300
# Find out which ABI we are using.
1301
echo 'int i;' > conftest.$ac_ext
1302
@@ -8867,7 +8867,10 @@
1306
- ppc64-*linux*|powerpc64-*linux*)
1307
+ powerpc64le-*linux*)
1308
+ LD="${LD-ld} -m elf32lppclinux"
1310
+ powerpc64-*linux*)
1311
LD="${LD-ld} -m elf32ppclinux"
1314
@@ -8886,7 +8889,10 @@
1316
LD="${LD-ld} -m elf_x86_64"
1318
- ppc*-*linux*|powerpc*-*linux*)
1319
+ powerpcle-*linux*)
1320
+ LD="${LD-ld} -m elf64lppc"
1323
LD="${LD-ld} -m elf64ppc"
1325
s390*-*linux*|s390*-*tpf*)
1326
@@ -13382,7 +13388,7 @@
1327
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1328
lt_status=$lt_dlunknown
1329
cat > conftest.$ac_ext <<_LT_EOF
1330
-#line 13385 "configure"
1331
+#line 13391 "configure"
1332
#include "confdefs.h"
1335
@@ -13488,7 +13494,7 @@
1336
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1337
lt_status=$lt_dlunknown
1338
cat > conftest.$ac_ext <<_LT_EOF
1339
-#line 13491 "configure"
1340
+#line 13497 "configure"
1341
#include "confdefs.h"
1344
@@ -19483,7 +19489,7 @@
1345
enableval=$enable_sjlj_exceptions; :
1347
cat > conftest.$ac_ext << EOF
1348
-#line 19486 "configure"
1349
+#line 19492 "configure"
1353
--- a/src/libgcc/config/rs6000/tramp.S
1354
+++ b/src/libgcc/config/rs6000/tramp.S
1355
@@ -116,4 +116,70 @@
1359
+#elif _CALL_ELF == 2
1360
+ .type trampoline_initial,@object
1362
+trampoline_initial:
1363
+ ld r11,.Lchain(r12)
1364
+ ld r12,.Lfunc(r12)
1367
+.Lfunc = .-trampoline_initial
1368
+ .quad 0 /* will be replaced with function address */
1369
+.Lchain = .-trampoline_initial
1370
+ .quad 0 /* will be replaced with static chain */
1372
+trampoline_size = .-trampoline_initial
1373
+ .size trampoline_initial,trampoline_size
1376
+/* R3 = stack address to store trampoline */
1377
+/* R4 = length of trampoline area */
1378
+/* R5 = function address */
1379
+/* R6 = static chain */
1381
+ .pushsection ".toc","aw"
1383
+ .quad trampoline_initial-8
1386
+FUNC_START(__trampoline_setup)
1387
+ addis 7,2,.LC0@toc@ha
1388
+ ld 7,.LC0@toc@l(7) /* trampoline address -8 */
1390
+ li r8,trampoline_size /* verify that the trampoline is big enough */
1392
+ srwi r4,r4,3 /* # doublewords to move */
1393
+ addi r9,r3,-8 /* adjust pointer for stdu */
1397
+ /* Copy the instructions to the stack */
1403
+ /* Store correct function and static chain */
1405
+ std r6,.Lchain(r3)
1407
+ /* Now flush both caches */
1415
+ /* Finally synchronize things & return */
1421
+ bl JUMP_TARGET(abort)
1423
+FUNC_END(__trampoline_setup)
1426
--- a/src/libgcc/config/rs6000/linux-unwind.h
1427
+++ b/src/libgcc/config/rs6000/linux-unwind.h
1435
#define R_VRSAVE 109
1437
+#ifdef __powerpc64__
1439
+#define TOC_SAVE_SLOT 24
1441
+#define TOC_SAVE_SLOT 40
1447
__attribute__ ((vector_size (16))) int vr[32];
1450
else if (pc[1] == 0x380000AC)
1453
+ /* These old kernel versions never supported ELFv2. */
1454
/* This works for 2.4 kernels, but not for 2.6 kernels with vdso
1455
because pc isn't pointing into the stack. Can be removed when
1456
no one is running 2.4.19 or 2.4.20, the first two ppc64
1458
if ((long) frame24->puc != -21 * 8)
1459
return frame24->puc->regs;
1463
/* This works for 2.4.21 and later kernels. */
1464
struct rt_sigframe {
1467
struct gcc_regs *regs = get_regs (context);
1468
struct gcc_vregs *vregs;
1473
@@ -206,11 +220,21 @@
1474
fs->regs.reg[i].loc.offset = (long) ®s->gpr[i] - new_cfa;
1477
+ /* The CR is saved in the low 32 bits of regs->ccr. */
1478
+ cr_offset = (long) ®s->ccr - new_cfa;
1479
+#ifndef __LITTLE_ENDIAN__
1480
+ cr_offset += sizeof (long) - 4;
1482
+ /* In the ELFv1 ABI, CR2 stands in for the whole CR. */
1483
fs->regs.reg[R_CR2].how = REG_SAVED_OFFSET;
1484
- /* CR? regs are always 32-bit and PPC is big-endian, so in 64-bit
1485
- libgcc loc.offset needs to point to the low 32 bits of regs->ccr. */
1486
- fs->regs.reg[R_CR2].loc.offset = (long) ®s->ccr - new_cfa
1487
- + sizeof (long) - 4;
1488
+ fs->regs.reg[R_CR2].loc.offset = cr_offset;
1490
+ /* In the ELFv2 ABI, every CR field has a separate CFI entry. */
1491
+ fs->regs.reg[R_CR3].how = REG_SAVED_OFFSET;
1492
+ fs->regs.reg[R_CR3].loc.offset = cr_offset;
1493
+ fs->regs.reg[R_CR4].how = REG_SAVED_OFFSET;
1494
+ fs->regs.reg[R_CR4].loc.offset = cr_offset;
1497
fs->regs.reg[R_LR].how = REG_SAVED_OFFSET;
1498
fs->regs.reg[R_LR].loc.offset = (long) ®s->link - new_cfa;
1499
@@ -294,9 +318,13 @@
1500
figure out if it was saved. The big problem here is that the
1501
code that does the save/restore is generated by the linker, so
1502
we have no good way to determine at compile time what to do. */
1503
- if (pc[0] == 0xF8410028
1504
+ if (pc[0] == 0xF8410000 + TOC_SAVE_SLOT
1506
+ /* The ELFv2 linker never generates the old PLT stub form. */
1507
|| ((pc[0] & 0xFFFF0000) == 0x3D820000
1508
- && pc[1] == 0xF8410028))
1509
+ && pc[1] == 0xF8410000 + TOC_SAVE_SLOT)
1513
/* We are in a plt call stub or r2 adjusting long branch stub,
1514
before r2 has been saved. Keep REG_UNSAVED. */
1515
@@ -305,18 +333,21 @@
1518
= (unsigned int *) _Unwind_GetGR (context, R_LR);
1519
- if (insn && *insn == 0xE8410028)
1520
- _Unwind_SetGRPtr (context, 2, context->cfa + 40);
1521
+ if (insn && *insn == 0xE8410000 + TOC_SAVE_SLOT)
1522
+ _Unwind_SetGRPtr (context, 2, context->cfa + TOC_SAVE_SLOT);
1524
+ /* ELFv2 does not use this function pointer call sequence. */
1525
else if (pc[0] == 0x4E800421
1526
- && pc[1] == 0xE8410028)
1527
+ && pc[1] == 0xE8410000 + TOC_SAVE_SLOT)
1529
/* We are at the bctrl instruction in a call via function
1530
pointer. gcc always emits the load of the new R2 just
1531
before the bctrl so this is the first and only place
1532
we need to use the stored R2. */
1533
_Unwind_Word sp = _Unwind_GetGR (context, 1);
1534
- _Unwind_SetGRPtr (context, 2, (void *)(sp + 40));
1535
+ _Unwind_SetGRPtr (context, 2, (void *)(sp + TOC_SAVE_SLOT));
1541
--- a/src/libgcc/ChangeLog.ibm
1542
+++ b/src/libgcc/ChangeLog.ibm
1544
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1546
+ Backport from mainline r204808:
1548
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1549
+ Alan Modra <amodra@gmail.com>
1551
+ * config/rs6000/linux-unwind.h (TOC_SAVE_SLOT): Define.
1552
+ (frob_update_context): Use it.
1554
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1555
+ Alan Modra <amodra@gmail.com>
1557
+ * config/rs6000/tramp.S [__powerpc64__ && _CALL_ELF == 2]:
1558
+ (trampoline_initial): Provide ELFv2 variant.
1559
+ (__trampoline_setup): Likewise.
1561
+ * config/rs6000/linux-unwind.h (frob_update_context): Do not
1562
+ check for AIX indirect function call sequence if _CALL_ELF == 2.
1564
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1565
+ Alan Modra <amodra@gmail.com>
1567
+ * config/rs6000/linux-unwind.h (get_regs): Do not support
1568
+ old kernel versions if _CALL_ELF == 2.
1569
+ (frob_update_context): Do not support PLT stub variants only
1570
+ generated by old linkers if _CALL_ELF == 2.
1572
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1574
+ Backport from mainline r204800:
1576
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
1577
+ Alan Modra <amodra@gmail.com>
1579
+ * config/rs6000/linux-unwind.h (ppc_fallback_frame_state): Correct
1580
+ location of CR save area for 64-bit little-endian systems.
1582
--- a/src/config.guess
1583
+++ b/src/config.guess
1586
# Attempt to guess a canonical system name.
1587
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
1588
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
1589
-# 2011, 2012, 2013 Free Software Foundation, Inc.
1590
+# Copyright 1992-2013 Free Software Foundation, Inc.
1592
-timestamp='2012-12-30'
1593
+timestamp='2013-06-10'
1595
# This file is free software; you can redistribute it and/or modify it
1596
# under the terms of the GNU General Public License as published by
1598
GNU config.guess ($timestamp)
1600
Originally written by Per Bothner.
1601
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
1602
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
1603
-2012, 2013 Free Software Foundation, Inc.
1604
+Copyright 1992-2013 Free Software Foundation, Inc.
1606
This is free software; see the source for copying conditions. There is NO
1607
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
1608
@@ -136,6 +132,27 @@
1609
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
1610
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
1612
+case "${UNAME_SYSTEM}" in
1614
+ # If the system lacks a compiler, then just pick glibc.
1615
+ # We could probably try harder.
1618
+ eval $set_cc_for_build
1619
+ cat <<-EOF > $dummy.c
1620
+ #include <features.h>
1621
+ #if defined(__UCLIBC__)
1623
+ #elif defined(__dietlibc__)
1629
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
1633
# Note: order is significant - the case branches are not exclusive.
1635
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
1636
@@ -857,21 +874,21 @@
1640
- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
1641
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
1644
# other systems with GNU libc and userland
1645
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
1646
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
1649
echo ${UNAME_MACHINE}-pc-minix
1652
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1653
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1655
aarch64_be:Linux:*:*)
1656
UNAME_MACHINE=aarch64_be
1657
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1658
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1661
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
1662
@@ -884,59 +901,54 @@
1663
EV68*) UNAME_MACHINE=alphaev68 ;;
1665
objdump --private-headers /bin/sh | grep -q ld.so.1
1666
- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
1667
- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
1668
+ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
1669
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1671
+ arc:Linux:*:* | arceb:Linux:*:*)
1672
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1675
eval $set_cc_for_build
1676
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
1677
| grep -q __ARM_EABI__
1679
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1680
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1682
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
1683
| grep -q __ARM_PCS_VFP
1685
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
1686
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
1688
- echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
1689
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
1694
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1695
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1698
- echo ${UNAME_MACHINE}-axis-linux-gnu
1699
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
1702
- echo ${UNAME_MACHINE}-axis-linux-gnu
1703
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
1706
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1707
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1710
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1711
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1715
- eval $set_cc_for_build
1716
- sed 's/^ //' << EOF >$dummy.c
1717
- #ifdef __dietlibc__
1721
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
1722
- echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
1723
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
1726
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1727
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1730
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1731
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1734
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1735
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1737
mips:Linux:*:* | mips64:Linux:*:*)
1738
eval $set_cc_for_build
1739
@@ -955,54 +967,63 @@
1742
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
1743
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
1744
+ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
1747
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1750
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1751
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1754
- echo sparc-unknown-linux-gnu
1755
+ echo sparc-unknown-linux-${LIBC}
1757
parisc64:Linux:*:* | hppa64:Linux:*:*)
1758
- echo hppa64-unknown-linux-gnu
1759
+ echo hppa64-unknown-linux-${LIBC}
1761
parisc:Linux:*:* | hppa:Linux:*:*)
1762
# Look for CPU level
1763
case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
1764
- PA7*) echo hppa1.1-unknown-linux-gnu ;;
1765
- PA8*) echo hppa2.0-unknown-linux-gnu ;;
1766
- *) echo hppa-unknown-linux-gnu ;;
1767
+ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
1768
+ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
1769
+ *) echo hppa-unknown-linux-${LIBC} ;;
1773
- echo powerpc64-unknown-linux-gnu
1774
+ echo powerpc64-unknown-linux-${LIBC}
1777
- echo powerpc-unknown-linux-gnu
1778
+ echo powerpc-unknown-linux-${LIBC}
1780
+ ppc64le:Linux:*:*)
1781
+ echo powerpc64le-unknown-linux-${LIBC}
1784
+ echo powerpcle-unknown-linux-${LIBC}
1786
s390:Linux:*:* | s390x:Linux:*:*)
1787
- echo ${UNAME_MACHINE}-ibm-linux
1788
+ echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
1791
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1792
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1795
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1796
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1798
sparc:Linux:*:* | sparc64:Linux:*:*)
1799
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1800
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1803
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1804
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1807
- echo ${UNAME_MACHINE}-dec-linux-gnu
1808
+ echo ${UNAME_MACHINE}-dec-linux-${LIBC}
1811
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1812
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1815
- echo ${UNAME_MACHINE}-unknown-linux-gnu
1816
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1818
i*86:DYNIX/ptx:4*:*)
1819
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
1820
@@ -1235,19 +1256,21 @@
1823
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
1824
- case $UNAME_PROCESSOR in
1826
- eval $set_cc_for_build
1827
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1828
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1829
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1830
- grep IS_64BIT_ARCH >/dev/null
1832
- UNAME_PROCESSOR="x86_64"
1835
- unknown) UNAME_PROCESSOR=powerpc ;;
1837
+ eval $set_cc_for_build
1838
+ if test "$UNAME_PROCESSOR" = unknown ; then
1839
+ UNAME_PROCESSOR=powerpc
1841
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1842
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1843
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1844
+ grep IS_64BIT_ARCH >/dev/null
1846
+ case $UNAME_PROCESSOR in
1847
+ i386) UNAME_PROCESSOR=x86_64 ;;
1848
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
1852
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
1854
*:procnto*:*:* | *:QNX:[0123456789]*:*)
1855
--- a/src/gcc/configure
1856
+++ b/src/gcc/configure
1857
@@ -13589,7 +13589,7 @@
1861
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
1862
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
1863
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
1864
# Find out which ABI we are using.
1865
echo 'int i;' > conftest.$ac_ext
1866
@@ -13614,7 +13614,10 @@
1870
- ppc64-*linux*|powerpc64-*linux*)
1871
+ powerpc64le-*linux*)
1872
+ LD="${LD-ld} -m elf32lppclinux"
1874
+ powerpc64-*linux*)
1875
LD="${LD-ld} -m elf32ppclinux"
1878
@@ -13633,7 +13636,10 @@
1880
LD="${LD-ld} -m elf_x86_64"
1882
- ppc*-*linux*|powerpc*-*linux*)
1883
+ powerpcle-*linux*)
1884
+ LD="${LD-ld} -m elf64lppc"
1887
LD="${LD-ld} -m elf64ppc"
1889
s390*-*linux*|s390*-*tpf*)
1890
--- a/src/gcc/builtins.c
1891
+++ b/src/gcc/builtins.c
1892
@@ -5850,6 +5850,9 @@
1895
CASE_FLT_FN (BUILT_IN_FABS):
1896
+ case BUILT_IN_FABSD32:
1897
+ case BUILT_IN_FABSD64:
1898
+ case BUILT_IN_FABSD128:
1899
target = expand_builtin_fabs (exp, target, subtarget);
1902
@@ -10302,6 +10305,9 @@
1903
return fold_builtin_strlen (loc, type, arg0);
1905
CASE_FLT_FN (BUILT_IN_FABS):
1906
+ case BUILT_IN_FABSD32:
1907
+ case BUILT_IN_FABSD64:
1908
+ case BUILT_IN_FABSD128:
1909
return fold_builtin_fabs (loc, arg0, type);
1912
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
1913
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
1915
/* { dg-final { scan-assembler-times "fabs" 3 } } */
1916
/* { dg-final { scan-assembler-times "fnabs" 3 } } */
1917
/* { dg-final { scan-assembler-times "fsel" 3 } } */
1918
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
1919
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
1920
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
1922
/* fabs/fnabs/fsel */
1923
double normal1 (double a, double b) { return __builtin_copysign (a, b); }
1924
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
1925
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
1927
+/* { dg-do compile { target { powerpc*-*-* } } } */
1928
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
1929
+/* { dg-require-effective-target powerpc_p8vector_ok } */
1930
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
1933
+#define TYPE long long
1937
+#define SIGN_TYPE signed TYPE
1941
+#define UNS_TYPE unsigned TYPE
1944
+typedef vector SIGN_TYPE v_sign;
1945
+typedef vector UNS_TYPE v_uns;
1947
+v_sign sign_add (v_sign a, v_sign b)
1952
+v_sign sign_sub (v_sign a, v_sign b)
1957
+v_sign sign_shift_left (v_sign a, v_sign b)
1962
+v_sign sign_shift_right (v_sign a, v_sign b)
1967
+v_uns uns_add (v_uns a, v_uns b)
1972
+v_uns uns_sub (v_uns a, v_uns b)
1977
+v_uns uns_shift_left (v_uns a, v_uns b)
1982
+v_uns uns_shift_right (v_uns a, v_uns b)
1987
+/* { dg-final { scan-assembler-times "vaddudm" 2 } } */
1988
+/* { dg-final { scan-assembler-times "vsubudm" 2 } } */
1989
+/* { dg-final { scan-assembler-times "vsld" 2 } } */
1990
+/* { dg-final { scan-assembler-times "vsrad" 1 } } */
1991
+/* { dg-final { scan-assembler-times "vsrd" 1 } } */
1992
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
1993
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
1995
+/* { dg-do compile { target { powerpc*-*-* } } } */
1996
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
1997
+/* { dg-require-effective-target powerpc_p8vector_ok } */
1998
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
2009
+#define TYPE long long
2013
+#define SIGN_TYPE signed TYPE
2017
+#define UNS_TYPE unsigned TYPE
2020
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
2022
+SIGN_TYPE sa[SIZE] ALIGN_ATTR;
2023
+SIGN_TYPE sb[SIZE] ALIGN_ATTR;
2024
+SIGN_TYPE sc[SIZE] ALIGN_ATTR;
2026
+UNS_TYPE ua[SIZE] ALIGN_ATTR;
2027
+UNS_TYPE ub[SIZE] ALIGN_ATTR;
2028
+UNS_TYPE uc[SIZE] ALIGN_ATTR;
2035
+ for (i = 0; i < SIZE; i++)
2036
+ sa[i] = sb[i] + sc[i];
2044
+ for (i = 0; i < SIZE; i++)
2045
+ sa[i] = sb[i] - sc[i];
2049
+sign_shift_left (void)
2053
+ for (i = 0; i < SIZE; i++)
2054
+ sa[i] = sb[i] << sc[i];
2058
+sign_shift_right (void)
2062
+ for (i = 0; i < SIZE; i++)
2063
+ sa[i] = sb[i] >> sc[i];
2071
+ for (i = 0; i < SIZE; i++)
2072
+ sa[i] = (sb[i] > sc[i]) ? sb[i] : sc[i];
2080
+ for (i = 0; i < SIZE; i++)
2081
+ sa[i] = (sb[i] < sc[i]) ? sb[i] : sc[i];
2089
+ for (i = 0; i < SIZE; i++)
2090
+ sa[i] = (sb[i] < 0) ? -sb[i] : sb[i]; /* xor, vsubudm, vmaxsd. */
2094
+sign_eq (SIGN_TYPE val1, SIGN_TYPE val2)
2098
+ for (i = 0; i < SIZE; i++)
2099
+ sa[i] = (sb[i] == sc[i]) ? val1 : val2;
2103
+sign_lt (SIGN_TYPE val1, SIGN_TYPE val2)
2107
+ for (i = 0; i < SIZE; i++)
2108
+ sa[i] = (sb[i] < sc[i]) ? val1 : val2;
2116
+ for (i = 0; i < SIZE; i++)
2117
+ ua[i] = ub[i] + uc[i];
2125
+ for (i = 0; i < SIZE; i++)
2126
+ ua[i] = ub[i] - uc[i];
2130
+uns_shift_left (void)
2134
+ for (i = 0; i < SIZE; i++)
2135
+ ua[i] = ub[i] << uc[i];
2139
+uns_shift_right (void)
2143
+ for (i = 0; i < SIZE; i++)
2144
+ ua[i] = ub[i] >> uc[i];
2152
+ for (i = 0; i < SIZE; i++)
2153
+ ua[i] = (ub[i] > uc[i]) ? ub[i] : uc[i];
2161
+ for (i = 0; i < SIZE; i++)
2162
+ ua[i] = (ub[i] < uc[i]) ? ub[i] : uc[i];
2166
+uns_eq (UNS_TYPE val1, UNS_TYPE val2)
2170
+ for (i = 0; i < SIZE; i++)
2171
+ ua[i] = (ub[i] == uc[i]) ? val1 : val2;
2175
+uns_lt (UNS_TYPE val1, UNS_TYPE val2)
2179
+ for (i = 0; i < SIZE; i++)
2180
+ ua[i] = (ub[i] < uc[i]) ? val1 : val2;
2183
+/* { dg-final { scan-assembler-times "\[\t \]vaddudm\[\t \]" 2 } } */
2184
+/* { dg-final { scan-assembler-times "\[\t \]vsubudm\[\t \]" 3 } } */
2185
+/* { dg-final { scan-assembler-times "\[\t \]vmaxsd\[\t \]" 2 } } */
2186
+/* { dg-final { scan-assembler-times "\[\t \]vmaxud\[\t \]" 1 } } */
2187
+/* { dg-final { scan-assembler-times "\[\t \]vminsd\[\t \]" 1 } } */
2188
+/* { dg-final { scan-assembler-times "\[\t \]vminud\[\t \]" 1 } } */
2189
+/* { dg-final { scan-assembler-times "\[\t \]vsld\[\t \]" 2 } } */
2190
+/* { dg-final { scan-assembler-times "\[\t \]vsrad\[\t \]" 1 } } */
2191
+/* { dg-final { scan-assembler-times "\[\t \]vsrd\[\t \]" 1 } } */
2192
+/* { dg-final { scan-assembler-times "\[\t \]vcmpequd\[\t \]" 2 } } */
2193
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtsd\[\t \]" 1 } } */
2194
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtud\[\t \]" 1 } } */
2195
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57744.c
2196
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57744.c
2198
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
2199
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2200
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2201
+/* { dg-options "-mcpu=power8 -O3" } */
2205
+typedef unsigned U_16 __attribute__((mode(TI)));
2207
+extern int libat_compare_exchange_16 (U_16 *, U_16 *, U_16, int, int)
2208
+ __attribute__((__noinline__));
2210
+/* PR 57744: lqarx/stqcx needs even/odd register pairs. The assembler will
2211
+ complain if the compiler gets an odd/even register pair. Create a function
2212
+ which has the 16 byte compare and exchange instructions, but don't actually
2213
+ execute it, so that we can detect these failures on older machines. */
2216
+libat_compare_exchange_16 (U_16 *mptr, U_16 *eptr, U_16 newval,
2217
+ int smodel, int fmodel __attribute__((unused)))
2219
+ if (((smodel) == 0))
2220
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 0, 0);
2221
+ else if (((smodel) != 5))
2222
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 4, 0);
2224
+ return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 5, 0);
2227
+U_16 a = 1, b = 1, c = -2;
2228
+volatile int do_test = 0;
2232
+ if (do_test && !libat_compare_exchange_16 (&a, &b, c, 0, 0))
2237
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-1.c
2238
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-1.c
2240
/* { dg-options "-O2 -mrecip -ffast-math -mcpu=power6" } */
2241
/* { dg-final { scan-assembler-times "frsqrte" 2 } } */
2242
/* { dg-final { scan-assembler-times "fmsub" 2 } } */
2243
-/* { dg-final { scan-assembler-times "fmul" 8 } } */
2244
-/* { dg-final { scan-assembler-times "fnmsub" 4 } } */
2245
+/* { dg-final { scan-assembler-times "fmul" 6 } } */
2246
+/* { dg-final { scan-assembler-times "fnmsub" 3 } } */
2250
--- a/src/gcc/testsuite/gcc.target/powerpc/darwin-longlong.c
2251
+++ b/src/gcc/testsuite/gcc.target/powerpc/darwin-longlong.c
2256
+#ifdef __LITTLE_ENDIAN__
2264
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
2265
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
2267
+/* { dg-do compile { target { powerpc*-*-* } } } */
2268
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2269
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2270
+/* { dg-options "-O2 -mcpu=power8" } */
2271
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
2272
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
2273
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
2274
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
2275
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
2276
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
2277
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
2278
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
2279
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
2280
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
2281
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
2282
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
2283
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
2284
+/* { dg-final { scan-assembler "\[ \t\]xxland " } } */
2285
+/* { dg-final { scan-assembler "\[ \t\]xxlor " } } */
2286
+/* { dg-final { scan-assembler "\[ \t\]xxlxor " } } */
2287
+/* { dg-final { scan-assembler "\[ \t\]xxlnor " } } */
2288
+/* { dg-final { scan-assembler "\[ \t\]xxlandc " } } */
2289
+/* { dg-final { scan-assembler "\[ \t\]xxleqv " } } */
2290
+/* { dg-final { scan-assembler "\[ \t\]xxlorc " } } */
2291
+/* { dg-final { scan-assembler "\[ \t\]xxlnand " } } */
2294
+typedef int v4si __attribute__ ((vector_size (16)));
2299
--- a/src/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
2300
+++ b/src/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
2302
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2303
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2304
+/* { dg-require-effective-target powerpc_vsx_ok } */
2305
+/* { dg-options "-O2 -mcpu=power6x -mmfpgpr" } */
2306
+/* { dg-final { scan-assembler "mffgpr" } } */
2307
+/* { dg-final { scan-assembler "mftgpr" } } */
2309
+/* Test that we generate the instructions to move between the GPR and FPR
2310
+ registers under power6x. */
2312
+extern long return_long (void);
2313
+extern double return_double (void);
2315
+double return_double2 (void)
2317
+ return (double) return_long ();
2320
+long return_long2 (void)
2322
+ return (long) return_double ();
2324
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
2325
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
2327
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
2328
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2329
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
2330
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2331
+/* { dg-options "-mcpu=power8 -O2" } */
2332
+/* { dg-final { scan-assembler "mtvsrd" } } */
2333
+/* { dg-final { scan-assembler "mfvsrd" } } */
2335
+/* Check code generation for direct move for vector types. */
2337
+#define TYPE vector int
2338
+#define VSX_REG_ATTR "wa"
2340
+#include "direct-move.h"
2341
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-av.c
2342
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-av.c
2344
+/* { dg-do compile { target { powerpc*-*-* } } } */
2345
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2346
+/* { dg-require-effective-target powerpc_altivec_ok } */
2347
+/* { dg-options "-O2 -mcpu=power6 -maltivec" } */
2348
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
2349
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
2350
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
2351
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
2352
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
2353
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
2354
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
2355
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
2356
+/* { dg-final { scan-assembler "\[ \t\]vand " } } */
2357
+/* { dg-final { scan-assembler "\[ \t\]vandc " } } */
2358
+/* { dg-final { scan-assembler "\[ \t\]vor " } } */
2359
+/* { dg-final { scan-assembler "\[ \t\]vxor " } } */
2360
+/* { dg-final { scan-assembler "\[ \t\]vnor " } } */
2361
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
2362
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
2363
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
2364
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
2365
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
2366
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
2367
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
2368
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
2371
+typedef int v4si __attribute__ ((vector_size (16)));
2376
--- a/src/gcc/testsuite/gcc.target/powerpc/pr43154.c
2377
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr43154.c
2379
/* { dg-do compile { target { powerpc*-*-* } } } */
2380
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2381
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
2382
/* { dg-require-effective-target powerpc_vsx_ok } */
2383
/* { dg-options "-O2 -mcpu=power7" } */
2385
--- a/src/gcc/testsuite/gcc.target/powerpc/pr59054.c
2386
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr59054.c
2388
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2389
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2390
+/* { dg-require-effective-target powerpc_vsx_ok } */
2391
+/* { dg-options "-mcpu=power7 -O0 -m64" } */
2393
+long foo (void) { return 0; }
2395
+/* { dg-final { scan-assembler-not "xxlor" } } */
2396
+/* { dg-final { scan-assembler-not "stfd" } } */
2397
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
2398
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
2400
+/* { dg-do compile { target { powerpc*-*-* } } } */
2401
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2402
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2403
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
2405
+#include <altivec.h>
2407
+typedef vector long long v_sign;
2408
+typedef vector unsigned long long v_uns;
2409
+typedef vector bool long long v_bool;
2411
+v_sign sign_add_1 (v_sign a, v_sign b)
2413
+ return __builtin_altivec_vaddudm (a, b);
2416
+v_sign sign_add_2 (v_sign a, v_sign b)
2418
+ return vec_add (a, b);
2421
+v_sign sign_add_3 (v_sign a, v_sign b)
2423
+ return vec_vaddudm (a, b);
2426
+v_sign sign_sub_1 (v_sign a, v_sign b)
2428
+ return __builtin_altivec_vsubudm (a, b);
2431
+v_sign sign_sub_2 (v_sign a, v_sign b)
2433
+ return vec_sub (a, b);
2437
+v_sign sign_sub_3 (v_sign a, v_sign b)
2439
+ return vec_vsubudm (a, b);
2442
+v_sign sign_min_1 (v_sign a, v_sign b)
2444
+ return __builtin_altivec_vminsd (a, b);
2447
+v_sign sign_min_2 (v_sign a, v_sign b)
2449
+ return vec_min (a, b);
2452
+v_sign sign_min_3 (v_sign a, v_sign b)
2454
+ return vec_vminsd (a, b);
2457
+v_sign sign_max_1 (v_sign a, v_sign b)
2459
+ return __builtin_altivec_vmaxsd (a, b);
2462
+v_sign sign_max_2 (v_sign a, v_sign b)
2464
+ return vec_max (a, b);
2467
+v_sign sign_max_3 (v_sign a, v_sign b)
2469
+ return vec_vmaxsd (a, b);
2472
+v_sign sign_abs (v_sign a)
2474
+ return vec_abs (a); /* xor, vsubudm, vmaxsd. */
2477
+v_bool sign_eq (v_sign a, v_sign b)
2479
+ return vec_cmpeq (a, b);
2482
+v_bool sign_lt (v_sign a, v_sign b)
2484
+ return vec_cmplt (a, b);
2487
+v_uns uns_add_2 (v_uns a, v_uns b)
2489
+ return vec_add (a, b);
2492
+v_uns uns_add_3 (v_uns a, v_uns b)
2494
+ return vec_vaddudm (a, b);
2497
+v_uns uns_sub_2 (v_uns a, v_uns b)
2499
+ return vec_sub (a, b);
2502
+v_uns uns_sub_3 (v_uns a, v_uns b)
2504
+ return vec_vsubudm (a, b);
2507
+v_uns uns_min_2 (v_uns a, v_uns b)
2509
+ return vec_min (a, b);
2512
+v_uns uns_min_3 (v_uns a, v_uns b)
2514
+ return vec_vminud (a, b);
2517
+v_uns uns_max_2 (v_uns a, v_uns b)
2519
+ return vec_max (a, b);
2522
+v_uns uns_max_3 (v_uns a, v_uns b)
2524
+ return vec_vmaxud (a, b);
2527
+v_bool uns_eq (v_uns a, v_uns b)
2529
+ return vec_cmpeq (a, b);
2532
+v_bool uns_lt (v_uns a, v_uns b)
2534
+ return vec_cmplt (a, b);
2537
+v_sign sign_rl_1 (v_sign a, v_sign b)
2539
+ return __builtin_altivec_vrld (a, b);
2542
+v_sign sign_rl_2 (v_sign a, v_uns b)
2544
+ return vec_rl (a, b);
2547
+v_uns uns_rl_2 (v_uns a, v_uns b)
2549
+ return vec_rl (a, b);
2552
+v_sign sign_sl_1 (v_sign a, v_sign b)
2554
+ return __builtin_altivec_vsld (a, b);
2557
+v_sign sign_sl_2 (v_sign a, v_uns b)
2559
+ return vec_sl (a, b);
2562
+v_sign sign_sl_3 (v_sign a, v_uns b)
2564
+ return vec_vsld (a, b);
2567
+v_uns uns_sl_2 (v_uns a, v_uns b)
2569
+ return vec_sl (a, b);
2572
+v_uns uns_sl_3 (v_uns a, v_uns b)
2574
+ return vec_vsld (a, b);
2577
+v_sign sign_sra_1 (v_sign a, v_sign b)
2579
+ return __builtin_altivec_vsrad (a, b);
2582
+v_sign sign_sra_2 (v_sign a, v_uns b)
2584
+ return vec_sra (a, b);
2587
+v_sign sign_sra_3 (v_sign a, v_uns b)
2589
+ return vec_vsrad (a, b);
2592
+/* { dg-final { scan-assembler-times "vaddudm" 5 } } */
2593
+/* { dg-final { scan-assembler-times "vsubudm" 6 } } */
2594
+/* { dg-final { scan-assembler-times "vmaxsd" 4 } } */
2595
+/* { dg-final { scan-assembler-times "vminsd" 3 } } */
2596
+/* { dg-final { scan-assembler-times "vmaxud" 2 } } */
2597
+/* { dg-final { scan-assembler-times "vminud" 2 } } */
2598
+/* { dg-final { scan-assembler-times "vcmpequd" 2 } } */
2599
+/* { dg-final { scan-assembler-times "vcmpgtsd" 1 } } */
2600
+/* { dg-final { scan-assembler-times "vcmpgtud" 1 } } */
2601
+/* { dg-final { scan-assembler-times "vrld" 3 } } */
2602
+/* { dg-final { scan-assembler-times "vsld" 5 } } */
2603
+/* { dg-final { scan-assembler-times "vsrad" 3 } } */
2604
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
2605
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
2607
+/* { dg-do compile { target { powerpc*-*-* } } } */
2608
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2609
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2610
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
2612
+#include <stddef.h>
2622
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
2624
+long long sign_ll[SIZE] ALIGN_ATTR;
2625
+int sign_i [SIZE] ALIGN_ATTR;
2627
+void copy_int_to_long_long (void)
2631
+ for (i = 0; i < SIZE; i++)
2632
+ sign_ll[i] = sign_i[i];
2635
+/* { dg-final { scan-assembler "vupkhsw" } } */
2636
+/* { dg-final { scan-assembler "vupklsw" } } */
2637
--- a/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-3.c
2638
+++ b/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-3.c
2640
+/* { dg-do compile } */
2641
+/* { dg-require-effective-target powerpc_altivec_ok } */
2642
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
2643
+/* { dg-options "-O -maltivec -mno-vsx" } */
2645
+typedef unsigned char V __attribute__((vector_size(16)));
2649
+ return __builtin_shuffle(x, y,
2650
+ (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
2656
+ return __builtin_shuffle(x, y,
2657
+ (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
2660
+/* { dg-final { scan-assembler-not "vperm" } } */
2661
+/* { dg-final { scan-assembler "vpkuhum" } } */
2662
+/* { dg-final { scan-assembler "vpkuwum" } } */
2663
--- a/src/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
2664
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
2666
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2667
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2668
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2669
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
2673
+ QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
2674
+ DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
2676
+enum bytecode_opcode
2678
+ neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
2679
+ constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
2680
+ loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
2681
+ storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
2682
+ addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
2683
+ convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
2684
+ convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
2685
+ convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
2686
+ convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
2687
+ convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
2688
+ convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
2689
+ convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
2690
+ convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
2691
+ convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
2692
+ convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
2693
+ addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
2694
+ mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
2695
+ divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
2696
+ xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
2697
+ rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
2698
+ leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
2699
+ geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
2700
+ eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
2701
+ negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
2702
+ predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
2703
+ preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
2704
+ preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
2705
+ postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
2706
+ postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
2707
+ postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
2708
+ caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
2710
+struct binary_operator
2712
+ enum bytecode_opcode opcode;
2713
+ enum typecode arg0;
2715
+static struct conversion_recipe
2717
+ unsigned char *opcodes;
2720
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
2721
+ LAST_AND_UNUSED_TYPECODE)];
2722
+static struct conversion_recipe
2723
+deduce_conversion (from, to)
2724
+ enum typecode from, to;
2726
+ (conversion_recipe[(int) from][(int) to].
2727
+ opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
2728
+ deduce_conversion (from, to), 0));
2732
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
2733
+ struct binary_operator optab[];
2735
+ int i, besti, cost, bestcost;
2736
+ enum typecode resultcode, arg0code;
2737
+ for (i = 0; optab[i].opcode != -1; ++i)
2739
+ (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
2740
+ opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
2741
+ deduce_conversion (arg0code, optab[i].arg0), 0));
2744
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-1.c
2745
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-1.c
2747
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2748
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
2749
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
2750
/* { dg-options "-O2 -mno-pointers-to-nested-functions" } */
2753
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
2754
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
2756
+/* { dg-do compile { target { powerpc*-*-* } } } */
2757
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2758
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2759
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf -fno-math-errno" } */
2761
+float abs_sf (float *p)
2764
+ __asm__ ("# reg %x0" : "+v" (f));
2765
+ return __builtin_fabsf (f);
2768
+float nabs_sf (float *p)
2771
+ __asm__ ("# reg %x0" : "+v" (f));
2772
+ return - __builtin_fabsf (f);
2775
+float neg_sf (float *p)
2778
+ __asm__ ("# reg %x0" : "+v" (f));
2782
+float add_sf (float *p, float *q)
2786
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2790
+float sub_sf (float *p, float *q)
2794
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2798
+float mul_sf (float *p, float *q)
2802
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2806
+float div_sf (float *p, float *q)
2810
+ __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
2814
+float sqrt_sf (float *p)
2817
+ __asm__ ("# reg %x0" : "+v" (f));
2818
+ return __builtin_sqrtf (f);
2822
+double abs_df (double *p)
2825
+ __asm__ ("# reg %x0" : "+v" (d));
2826
+ return __builtin_fabs (d);
2829
+double nabs_df (double *p)
2832
+ __asm__ ("# reg %x0" : "+v" (d));
2833
+ return - __builtin_fabs (d);
2836
+double neg_df (double *p)
2839
+ __asm__ ("# reg %x0" : "+v" (d));
2843
+double add_df (double *p, double *q)
2847
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2851
+double sub_df (double *p, double *q)
2855
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2859
+double mul_df (double *p, double *q)
2863
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2867
+double div_df (double *p, double *q)
2871
+ __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
2875
+double sqrt_df (float *p)
2878
+ __asm__ ("# reg %x0" : "+v" (d));
2879
+ return __builtin_sqrt (d);
2882
+/* { dg-final { scan-assembler "xsabsdp" } } */
2883
+/* { dg-final { scan-assembler "xsadddp" } } */
2884
+/* { dg-final { scan-assembler "xsaddsp" } } */
2885
+/* { dg-final { scan-assembler "xsdivdp" } } */
2886
+/* { dg-final { scan-assembler "xsdivsp" } } */
2887
+/* { dg-final { scan-assembler "xsmuldp" } } */
2888
+/* { dg-final { scan-assembler "xsmulsp" } } */
2889
+/* { dg-final { scan-assembler "xsnabsdp" } } */
2890
+/* { dg-final { scan-assembler "xsnegdp" } } */
2891
+/* { dg-final { scan-assembler "xssqrtdp" } } */
2892
+/* { dg-final { scan-assembler "xssqrtsp" } } */
2893
+/* { dg-final { scan-assembler "xssubdp" } } */
2894
+/* { dg-final { scan-assembler "xssubsp" } } */
2895
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
2896
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
2898
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
2899
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2900
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
2901
+/* { dg-require-effective-target p8vector_hw } */
2902
+/* { dg-options "-mcpu=power8 -O2" } */
2904
+/* Check whether we get the right bits for direct move at runtime. */
2906
+#define TYPE vector int
2908
+#define VSX_REG_ATTR "wa"
2910
+#include "direct-move.h"
2911
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
2912
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
2914
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2915
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2916
+/* { dg-require-effective-target powerpc_vsx_ok } */
2917
+/* { dg-options "-O2 -mcpu=power7" } */
2918
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
2919
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
2920
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
2921
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
2922
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
2923
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
2924
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
2925
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
2926
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
2927
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
2928
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
2929
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
2930
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
2931
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
2932
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
2933
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
2934
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
2935
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
2937
+/* On power7, for 128-bit types, ORC/ANDC/EQV might not show up, since the
2938
+ vector unit doesn't support these, so the appropriate combine patterns may
2939
+ not be generated. */
2943
+#define TYPE __int128_t
2945
+typedef int v4si __attribute__ ((vector_size (16)));
2951
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
2952
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
2954
+/* { dg-do compile { target { powerpc*-*-* } } } */
2955
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
2956
+/* { dg-require-effective-target powerpc_p8vector_ok } */
2957
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
2959
+#include <altivec.h>
2961
+typedef vector long long vll_sign;
2962
+typedef vector unsigned long long vll_uns;
2963
+typedef vector bool long long vll_bool;
2965
+typedef vector int vi_sign;
2966
+typedef vector unsigned int vi_uns;
2967
+typedef vector bool int vi_bool;
2969
+typedef vector short vs_sign;
2970
+typedef vector unsigned short vs_uns;
2971
+typedef vector bool short vs_bool;
2973
+typedef vector signed char vc_sign;
2974
+typedef vector unsigned char vc_uns;
2975
+typedef vector bool char vc_bool;
2978
+vi_sign vi_pack_1 (vll_sign a, vll_sign b)
2980
+ return __builtin_altivec_vpkudum (a, b);
2983
+vi_sign vi_pack_2 (vll_sign a, vll_sign b)
2985
+ return vec_pack (a, b);
2988
+vi_sign vi_pack_3 (vll_sign a, vll_sign b)
2990
+ return vec_vpkudum (a, b);
2993
+vs_sign vs_pack_1 (vi_sign a, vi_sign b)
2995
+ return __builtin_altivec_vpkuwum (a, b);
2998
+vs_sign vs_pack_2 (vi_sign a, vi_sign b)
3000
+ return vec_pack (a, b);
3003
+vs_sign vs_pack_3 (vi_sign a, vi_sign b)
3005
+ return vec_vpkuwum (a, b);
3008
+vc_sign vc_pack_1 (vs_sign a, vs_sign b)
3010
+ return __builtin_altivec_vpkuhum (a, b);
3013
+vc_sign vc_pack_2 (vs_sign a, vs_sign b)
3015
+ return vec_pack (a, b);
3018
+vc_sign vc_pack_3 (vs_sign a, vs_sign b)
3020
+ return vec_vpkuhum (a, b);
3023
+vll_sign vll_unpack_hi_1 (vi_sign a)
3025
+ return __builtin_altivec_vupkhsw (a);
3028
+vll_sign vll_unpack_hi_2 (vi_sign a)
3030
+ return vec_unpackh (a);
3033
+vll_sign vll_unpack_hi_3 (vi_sign a)
3035
+ return __builtin_vec_vupkhsw (a);
3038
+vll_sign vll_unpack_lo_1 (vi_sign a)
3040
+ return vec_vupklsw (a);
3043
+vll_sign vll_unpack_lo_2 (vi_sign a)
3045
+ return vec_unpackl (a);
3048
+vll_sign vll_unpack_lo_3 (vi_sign a)
3050
+ return vec_vupklsw (a);
3053
+/* { dg-final { scan-assembler-times "vpkudum" 3 } } */
3054
+/* { dg-final { scan-assembler-times "vpkuwum" 3 } } */
3055
+/* { dg-final { scan-assembler-times "vpkuhum" 3 } } */
3056
+/* { dg-final { scan-assembler-times "vupklsw" 3 } } */
3057
+/* { dg-final { scan-assembler-times "vupkhsw" 3 } } */
3058
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
3059
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
3061
+/* { dg-do compile { target { powerpc*-*-* } } } */
3062
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3063
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3064
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
3066
+#include <stddef.h>
3076
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
3078
+long long sign_ll[SIZE] ALIGN_ATTR;
3079
+int sign_i [SIZE] ALIGN_ATTR;
3081
+void copy_long_long_to_int (void)
3085
+ for (i = 0; i < SIZE; i++)
3086
+ sign_i[i] = sign_ll[i];
3089
+/* { dg-final { scan-assembler "vpkudum" } } */
3090
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move.h
3091
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move.h
3093
+/* Test functions for direct move support. */
3096
+extern void abort (void);
3098
+#ifndef VSX_REG_ATTR
3099
+#define VSX_REG_ATTR "wa"
3102
+void __attribute__((__noinline__))
3103
+copy (TYPE *a, TYPE *b)
3109
+void __attribute__((__noinline__))
3110
+load_gpr (TYPE *a, TYPE *b)
3113
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
3119
+void __attribute__((__noinline__))
3120
+load_fpr (TYPE *a, TYPE *b)
3123
+ __asm__ ("# fpr, reg = %0" : "+d" (c));
3129
+void __attribute__((__noinline__))
3130
+load_altivec (TYPE *a, TYPE *b)
3133
+ __asm__ ("# altivec, reg = %0" : "+v" (c));
3139
+void __attribute__((__noinline__))
3140
+load_vsx (TYPE *a, TYPE *b)
3143
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
3148
+#ifndef NO_GPR_TO_VSX
3149
+void __attribute__((__noinline__))
3150
+load_gpr_to_vsx (TYPE *a, TYPE *b)
3154
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
3156
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (d));
3161
+#ifndef NO_VSX_TO_GPR
3162
+void __attribute__((__noinline__))
3163
+load_vsx_to_gpr (TYPE *a, TYPE *b)
3167
+ __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
3169
+ __asm__ ("# gpr, reg = %0" : "+b" (d));
3175
+typedef void (fn_type (TYPE *, TYPE *));
3177
+struct test_struct {
3182
+const struct test_struct test_functions[] = {
3185
+ { load_gpr, "load_gpr" },
3188
+ { load_fpr, "load_fpr" },
3191
+ { load_altivec, "load_altivec" },
3194
+ { load_vsx, "load_vsx" },
3196
+#ifndef NO_GPR_TO_VSX
3197
+ { load_gpr_to_vsx, "load_gpr_to_vsx" },
3199
+#ifndef NO_VSX_TO_GPR
3200
+ { load_vsx_to_gpr, "load_vsx_to_gpr" },
3204
+/* Test a given value for each of the functions. */
3205
+void __attribute__((__noinline__))
3206
+test_value (TYPE a)
3210
+ for (i = 0; i < sizeof (test_functions) / sizeof (test_functions[0]); i++)
3214
+ test_functions[i].func (&a, &b);
3215
+ if (memcmp ((void *)&a, (void *)&b, sizeof (TYPE)) != 0)
3220
+/* Main program. */
3227
+ unsigned char bytes[sizeof (TYPE)];
3231
+ TYPE value = (TYPE)-5;
3232
+ for (i = 0; i < 12; i++)
3234
+ test_value (value);
3238
+ for (i = 0; i < 8*sizeof (TYPE); i++)
3239
+ test_value (((TYPE)1) << i);
3242
+ TYPE value = (TYPE)0;
3243
+ for (i = 0; i < 10; i++)
3245
+ test_value (value);
3246
+ test_value (~ value);
3250
+ for (i = 0; i < 8*sizeof (TYPE); i++)
3251
+ test_value (((TYPE)1) << i);
3254
+ TYPE value = (TYPE)-5;
3255
+ for (i = 0; i < 12; i++)
3257
+ test_value (value);
3261
+ test_value ((TYPE)3.1415926535);
3262
+ test_value ((TYPE)1.23456);
3263
+ test_value ((TYPE)(-0.0));
3264
+ test_value ((TYPE)NAN);
3265
+ test_value ((TYPE)+INFINITY);
3266
+ test_value ((TYPE)-INFINITY);
3269
+ for (j = 0; j < 10; j++)
3271
+ for (i = 0; i < sizeof (TYPE); i++)
3272
+ u.bytes[i] = (unsigned char) (random () >> 4);
3274
+ test_value (u.value);
3281
--- a/src/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
3282
+++ b/src/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
3284
+/* { dg-do compile { target { powerpc*-*-* } } } */
3285
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3286
+/* { dg-require-effective-target powerpc_vsx_ok } */
3287
+/* { dg-options "-O2 -mcpu=power7 -mhard-dfp" } */
3288
+/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
3289
+/* { dg-final { scan-assembler-times "stfiwx" 1 } } */
3290
+/* { dg-final { scan-assembler-not "lfd" } } */
3291
+/* { dg-final { scan-assembler-not "stfd" } } */
3292
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
3293
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
3294
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
3296
+/* Test that power7 can directly load/store SDmode variables without using a
3300
+void inc_dec32 (void)
3302
+ a += (_Decimal32) 1.0;
3304
--- a/src/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
3305
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
3307
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3308
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3309
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3310
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
3312
+#include <stddef.h>
3313
+#include <stdlib.h>
3315
+#include <string.h>
3317
+typedef long unsigned int size_t;
3318
+typedef struct _IO_FILE FILE;
3319
+typedef float real;
3320
+typedef real rvec[3];
3321
+typedef real matrix[3][3];
3322
+typedef real tensor[3][3];
3325
+ F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
3326
+ F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
3327
+ F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
3328
+ F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
3329
+ F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
3330
+ F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
3331
+ F_DVDL, F_DVDLKIN, F_NRE
3341
+ real rA, krA, rB, krB;
3348
+ t_iparams *iparams;
3373
+ eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
3374
+ eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
3375
+ eoObsNR, eoInter, eoUseVirial, eoNR
3377
+extern char *eoNames[eoNR];
3391
+ real act_value[eoObsNR];
3392
+ real av_value[eoObsNR];
3393
+ real ref_value[eoObsNR];
3394
+ int bObsUsed[eoObsNR];
3395
+ int nLJ, nBU, nQ, nIP;
3400
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
3403
+ static FILE *prop;
3404
+ static FILE **out = ((void *) 0);
3405
+ static FILE **qq = ((void *) 0);
3406
+ static FILE **ip = ((void *) 0);
3413
+ if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
3414
+ && (ip == ((void *) 0)))
3416
+ for (i = j = 0; (i < eoObsNR); i++)
3418
+ if (tcr->bObsUsed[i])
3422
+ (__builtin_constant_p (eoNames[i])
3423
+ && ((size_t) (const void *) ((eoNames[i]) + 1) -
3424
+ (size_t) (const void *) (eoNames[i]) ==
3425
+ 1) ? (((const char *) (eoNames[i]))[0] ==
3426
+ '\0' ? (char *) calloc ((size_t) 1,
3445
+ )): __strdup (eoNames[i])));
3448
+ (__builtin_constant_p (buf)
3449
+ && ((size_t) (const void *) ((buf) + 1) -
3450
+ (size_t) (const void *) (buf) ==
3451
+ 1) ? (((const char *) (buf))[0] ==
3452
+ '\0' ? (char *) calloc ((size_t) 1,
3470
+ )): __strdup (buf)));
3475
+ for (i = 0; (i < tcr->nLJ); i++)
3477
+ if (tcr->tcLJ[i].bPrint)
3479
+ xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
3488
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
3489
+ int step, real ener[], t_forcerec * fr, t_inputrec * ir,
3490
+ int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
3491
+ int nmols, t_commrec * cr, matrix box, tensor virial,
3492
+ tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
3494
+ int i, j, ati, atj, atnr2, type, ftype;
3495
+ real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
3496
+ real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
3497
+ int bTest, bPrint;
3498
+ t_coupl_iparams *tip;
3501
+ pr_ff (tcr, t, idef, cr, nfile, fnm);
3503
+ for (i = 0; (i < eoObsNR); i++)
3506
+ calc_deviation (tcr->av_value[i], tcr->act_value[i],
3507
+ tcr->ref_value[i]);
3508
+ prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
3511
+ pr_dev (tcr, t, prdev, cr, nfile, fnm);
3512
+ for (i = 0; (i < atnr2); i++)
3514
+ factor = dt * deviation[tip->eObs];
3518
+ if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
3519
+ idef->iparams[type].harmonic.krA *=
3520
+ (1 + factor / tip->xi.harmonic.krA);
3524
--- a/src/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
3525
+++ b/src/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
3527
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3528
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3529
+/* { dg-require-effective-target powerpc_vsx_ok } */
3530
+/* { dg-options "-mcpu=power7 -O2" } */
3531
+/* { dg-final { scan-assembler-not "lbarx" } } */
3532
+/* { dg-final { scan-assembler-not "lharx" } } */
3533
+/* { dg-final { scan-assembler-times "lwarx" 18 } } */
3534
+/* { dg-final { scan-assembler-times "ldarx" 6 } } */
3535
+/* { dg-final { scan-assembler-not "lqarx" } } */
3536
+/* { dg-final { scan-assembler-not "stbcx" } } */
3537
+/* { dg-final { scan-assembler-not "sthcx" } } */
3538
+/* { dg-final { scan-assembler-times "stwcx" 18 } } */
3539
+/* { dg-final { scan-assembler-times "stdcx" 6 } } */
3540
+/* { dg-final { scan-assembler-not "stqcx" } } */
3541
+/* { dg-final { scan-assembler-times "bl __atomic" 6 } } */
3542
+/* { dg-final { scan-assembler-times "isync" 12 } } */
3543
+/* { dg-final { scan-assembler-times "lwsync" 8 } } */
3544
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
3545
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
3546
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
3547
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
3548
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
3550
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
3552
+char_fetch_add_relaxed (char *ptr, int value)
3554
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3558
+char_fetch_sub_consume (char *ptr, int value)
3560
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3564
+char_fetch_and_acquire (char *ptr, int value)
3566
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3570
+char_fetch_ior_release (char *ptr, int value)
3572
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3576
+char_fetch_xor_acq_rel (char *ptr, int value)
3578
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3582
+char_fetch_nand_seq_cst (char *ptr, int value)
3584
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3587
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
3589
+short_fetch_add_relaxed (short *ptr, int value)
3591
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3595
+short_fetch_sub_consume (short *ptr, int value)
3597
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3601
+short_fetch_and_acquire (short *ptr, int value)
3603
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3607
+short_fetch_ior_release (short *ptr, int value)
3609
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3613
+short_fetch_xor_acq_rel (short *ptr, int value)
3615
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3619
+short_fetch_nand_seq_cst (short *ptr, int value)
3621
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3624
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
3626
+int_fetch_add_relaxed (int *ptr, int value)
3628
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3632
+int_fetch_sub_consume (int *ptr, int value)
3634
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3638
+int_fetch_and_acquire (int *ptr, int value)
3640
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3644
+int_fetch_ior_release (int *ptr, int value)
3646
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3650
+int_fetch_xor_acq_rel (int *ptr, int value)
3652
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3656
+int_fetch_nand_seq_cst (int *ptr, int value)
3658
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3661
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
3663
+long_fetch_add_relaxed (long *ptr, long value)
3665
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3669
+long_fetch_sub_consume (long *ptr, long value)
3671
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3675
+long_fetch_and_acquire (long *ptr, long value)
3677
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3681
+long_fetch_ior_release (long *ptr, long value)
3683
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3687
+long_fetch_xor_acq_rel (long *ptr, long value)
3689
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3693
+long_fetch_nand_seq_cst (long *ptr, long value)
3695
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3698
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
3700
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
3702
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
3706
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
3708
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
3712
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
3714
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
3718
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
3720
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
3724
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
3726
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
3730
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
3732
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
3734
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-3.c
3735
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-3.c
3737
/* { dg-do compile { target { { powerpc*-*-* } && { ! powerpc*-apple-darwin* } } } } */
3738
/* { dg-require-effective-target powerpc_fprs } */
3739
/* { dg-options "-O2 -mrecip -ffast-math -mcpu=power7" } */
3740
-/* { dg-final { scan-assembler-times "xsrsqrtedp" 1 } } */
3741
+/* { dg-final { scan-assembler-times "xsrsqrtedp\|frsqrte\ " 1 } } */
3742
/* { dg-final { scan-assembler-times "xsmsub.dp\|fmsub\ " 1 } } */
3743
-/* { dg-final { scan-assembler-times "xsmuldp" 4 } } */
3744
+/* { dg-final { scan-assembler-times "xsmuldp\|fmul\ " 4 } } */
3745
/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 2 } } */
3746
-/* { dg-final { scan-assembler-times "frsqrtes" 1 } } */
3747
-/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
3748
-/* { dg-final { scan-assembler-times "fmuls" 4 } } */
3749
-/* { dg-final { scan-assembler-times "fnmsubs" 2 } } */
3750
+/* { dg-final { scan-assembler-times "xsrsqrtesp\|frsqrtes" 1 } } */
3751
+/* { dg-final { scan-assembler-times "xsmsub.sp\|fmsubs" 1 } } */
3752
+/* { dg-final { scan-assembler-times "xsmulsp\|fmuls" 2 } } */
3753
+/* { dg-final { scan-assembler-times "xsnmsub.sp\|fnmsubs" 1 } } */
3757
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-2.c
3758
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-2.c
3760
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3761
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
3762
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
3763
/* { dg-options "-O2 -mpointers-to-nested-functions" } */
3766
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
3767
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
3769
+/* { dg-do compile { target { powerpc*-*-* } } } */
3770
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3771
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3772
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
3774
+float load_sf (float *p)
3777
+ __asm__ ("# reg %x0" : "+v" (f));
3781
+double load_df (double *p)
3784
+ __asm__ ("# reg %x0" : "+v" (d));
3788
+double load_dfsf (float *p)
3790
+ double d = (double) *p;
3791
+ __asm__ ("# reg %x0" : "+v" (d));
3795
+void store_sf (float *p, float f)
3797
+ __asm__ ("# reg %x0" : "+v" (f));
3801
+void store_df (double *p, double d)
3803
+ __asm__ ("# reg %x0" : "+v" (d));
3807
+/* { dg-final { scan-assembler "lxsspx" } } */
3808
+/* { dg-final { scan-assembler "lxsdx" } } */
3809
+/* { dg-final { scan-assembler "stxsspx" } } */
3810
+/* { dg-final { scan-assembler "stxsdx" } } */
3811
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
3812
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
3814
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
3815
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3816
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3817
+/* { dg-options "-O2 -mcpu=power8" } */
3818
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
3819
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
3820
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
3821
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
3822
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
3823
+/* { dg-final { scan-assembler "\[ \t\]eqv " } } */
3824
+/* { dg-final { scan-assembler "\[ \t\]orc " } } */
3825
+/* { dg-final { scan-assembler "\[ \t\]nand " } } */
3826
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
3827
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
3828
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
3829
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
3830
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
3831
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
3832
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
3833
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
3834
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
3835
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
3836
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
3837
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
3838
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
3842
+#define TYPE __int128_t
3844
+typedef int v4si __attribute__ ((vector_size (16)));
3850
--- a/src/gcc/testsuite/gcc.target/powerpc/htm-xl-intrin-1.c
3851
+++ b/src/gcc/testsuite/gcc.target/powerpc/htm-xl-intrin-1.c
3853
+/* This checks the availability of the XL compiler intrinsics for
3854
+ transactional execution with the expected prototypes. */
3856
+/* { dg-do compile { target { powerpc*-*-* } } } */
3857
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3858
+/* { dg-require-effective-target powerpc_htm_ok } */
3859
+/* { dg-options "-O2 -mhtm" } */
3861
+#include <htmxlintrin.h>
3864
+foo (void *TM_buff, long *result, unsigned char *code)
3866
+ *result++ = __TM_simple_begin ();
3867
+ *result++ = __TM_begin (TM_buff);
3868
+ *result++ = __TM_end ();
3870
+ __TM_named_abort (*code);
3873
+ *result++ = __TM_is_user_abort (TM_buff);
3874
+ *result++ = __TM_is_named_user_abort (TM_buff, code);
3875
+ *result++ = __TM_is_illegal (TM_buff);
3876
+ *result++ = __TM_is_footprint_exceeded (TM_buff);
3877
+ *result++ = __TM_nesting_depth (TM_buff);
3878
+ *result++ = __TM_is_nested_too_deep (TM_buff);
3879
+ *result++ = __TM_is_conflict (TM_buff);
3880
+ *result++ = __TM_is_failure_persistent (TM_buff);
3881
+ *result++ = __TM_failure_address (TM_buff);
3882
+ *result++ = __TM_failure_code (TM_buff);
3885
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
3886
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
3888
+/* { dg-do compile { target { powerpc*-*-* } } } */
3889
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
3890
+/* { dg-require-effective-target powerpc_p8vector_ok } */
3891
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
3893
+#include <altivec.h>
3895
+typedef vector long long vll_sign;
3896
+typedef vector unsigned long long vll_uns;
3897
+typedef vector bool long long vll_bool;
3899
+typedef vector int vi_sign;
3900
+typedef vector unsigned int vi_uns;
3901
+typedef vector bool int vi_bool;
3903
+typedef vector short vs_sign;
3904
+typedef vector unsigned short vs_uns;
3905
+typedef vector bool short vs_bool;
3907
+typedef vector signed char vc_sign;
3908
+typedef vector unsigned char vc_uns;
3909
+typedef vector bool char vc_bool;
3911
+vll_sign vll_clz_1 (vll_sign a)
3913
+ return __builtin_altivec_vclzd (a);
3916
+vll_sign vll_clz_2 (vll_sign a)
3918
+ return vec_vclz (a);
3921
+vll_sign vll_clz_3 (vll_sign a)
3923
+ return vec_vclzd (a);
3926
+vll_uns vll_clz_4 (vll_uns a)
3928
+ return vec_vclz (a);
3931
+vll_uns vll_clz_5 (vll_uns a)
3933
+ return vec_vclzd (a);
3936
+vi_sign vi_clz_1 (vi_sign a)
3938
+ return __builtin_altivec_vclzw (a);
3941
+vi_sign vi_clz_2 (vi_sign a)
3943
+ return vec_vclz (a);
3946
+vi_sign vi_clz_3 (vi_sign a)
3948
+ return vec_vclzw (a);
3951
+vi_uns vi_clz_4 (vi_uns a)
3953
+ return vec_vclz (a);
3956
+vi_uns vi_clz_5 (vi_uns a)
3958
+ return vec_vclzw (a);
3961
+vs_sign vs_clz_1 (vs_sign a)
3963
+ return __builtin_altivec_vclzh (a);
3966
+vs_sign vs_clz_2 (vs_sign a)
3968
+ return vec_vclz (a);
3971
+vs_sign vs_clz_3 (vs_sign a)
3973
+ return vec_vclzh (a);
3976
+vs_uns vs_clz_4 (vs_uns a)
3978
+ return vec_vclz (a);
3981
+vs_uns vs_clz_5 (vs_uns a)
3983
+ return vec_vclzh (a);
3986
+vc_sign vc_clz_1 (vc_sign a)
3988
+ return __builtin_altivec_vclzb (a);
3991
+vc_sign vc_clz_2 (vc_sign a)
3993
+ return vec_vclz (a);
3996
+vc_sign vc_clz_3 (vc_sign a)
3998
+ return vec_vclzb (a);
4001
+vc_uns vc_clz_4 (vc_uns a)
4003
+ return vec_vclz (a);
4006
+vc_uns vc_clz_5 (vc_uns a)
4008
+ return vec_vclzb (a);
4011
+vll_sign vll_popcnt_1 (vll_sign a)
4013
+ return __builtin_altivec_vpopcntd (a);
4016
+vll_sign vll_popcnt_2 (vll_sign a)
4018
+ return vec_vpopcnt (a);
4021
+vll_sign vll_popcnt_3 (vll_sign a)
4023
+ return vec_vpopcntd (a);
4026
+vll_uns vll_popcnt_4 (vll_uns a)
4028
+ return vec_vpopcnt (a);
4031
+vll_uns vll_popcnt_5 (vll_uns a)
4033
+ return vec_vpopcntd (a);
4036
+vi_sign vi_popcnt_1 (vi_sign a)
4038
+ return __builtin_altivec_vpopcntw (a);
4041
+vi_sign vi_popcnt_2 (vi_sign a)
4043
+ return vec_vpopcnt (a);
4046
+vi_sign vi_popcnt_3 (vi_sign a)
4048
+ return vec_vpopcntw (a);
4051
+vi_uns vi_popcnt_4 (vi_uns a)
4053
+ return vec_vpopcnt (a);
4056
+vi_uns vi_popcnt_5 (vi_uns a)
4058
+ return vec_vpopcntw (a);
4061
+vs_sign vs_popcnt_1 (vs_sign a)
4063
+ return __builtin_altivec_vpopcnth (a);
4066
+vs_sign vs_popcnt_2 (vs_sign a)
4068
+ return vec_vpopcnt (a);
4071
+vs_sign vs_popcnt_3 (vs_sign a)
4073
+ return vec_vpopcnth (a);
4076
+vs_uns vs_popcnt_4 (vs_uns a)
4078
+ return vec_vpopcnt (a);
4081
+vs_uns vs_popcnt_5 (vs_uns a)
4083
+ return vec_vpopcnth (a);
4086
+vc_sign vc_popcnt_1 (vc_sign a)
4088
+ return __builtin_altivec_vpopcntb (a);
4091
+vc_sign vc_popcnt_2 (vc_sign a)
4093
+ return vec_vpopcnt (a);
4096
+vc_sign vc_popcnt_3 (vc_sign a)
4098
+ return vec_vpopcntb (a);
4101
+vc_uns vc_popcnt_4 (vc_uns a)
4103
+ return vec_vpopcnt (a);
4106
+vc_uns vc_popcnt_5 (vc_uns a)
4108
+ return vec_vpopcntb (a);
4111
+vc_uns vc_gbb_1 (vc_uns a)
4113
+ return __builtin_altivec_vgbbd (a);
4116
+vc_sign vc_gbb_2 (vc_sign a)
4118
+ return vec_vgbbd (a);
4121
+vc_uns vc_gbb_3 (vc_uns a)
4123
+ return vec_vgbbd (a);
4126
+/* { dg-final { scan-assembler-times "vclzd" 5 } } */
4127
+/* { dg-final { scan-assembler-times "vclzw" 5 } } */
4128
+/* { dg-final { scan-assembler-times "vclzh" 5 } } */
4129
+/* { dg-final { scan-assembler-times "vclzb" 5 } } */
4131
+/* { dg-final { scan-assembler-times "vpopcntd" 5 } } */
4132
+/* { dg-final { scan-assembler-times "vpopcntw" 5 } } */
4133
+/* { dg-final { scan-assembler-times "vpopcnth" 5 } } */
4134
+/* { dg-final { scan-assembler-times "vpopcntb" 5 } } */
4136
+/* { dg-final { scan-assembler-times "vgbbd" 3 } } */
4137
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3-av.c
4138
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3-av.c
4140
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4141
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4142
+/* { dg-require-effective-target powerpc_altivec_ok } */
4143
+/* { dg-options "-O2 -mcpu=power6 -mabi=altivec -maltivec -mno-vsx" } */
4144
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
4145
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
4146
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
4147
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
4148
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
4149
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
4150
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
4151
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
4152
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
4153
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
4154
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
4155
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
4156
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
4157
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
4158
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
4159
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
4160
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
4161
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
4163
+/* On altivec, for 128-bit types, ORC/ANDC/EQV might not show up, since the
4164
+ vector unit doesn't support these, so the appropriate combine patterns may
4165
+ not be generated. */
4169
+#define TYPE __int128_t
4171
+typedef int v4si __attribute__ ((vector_size (16)));
4177
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
4178
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
4180
+/* { dg-do compile { target { powerpc*-*-* } } } */
4181
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4182
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4183
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4193
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
4195
+#define DO_BUILTIN(PREFIX, TYPE, CLZ, POPCNT) \
4196
+TYPE PREFIX ## _a[SIZE] ALIGN_ATTR; \
4197
+TYPE PREFIX ## _b[SIZE] ALIGN_ATTR; \
4200
+PREFIX ## _clz (void) \
4202
+ unsigned long i; \
4204
+ for (i = 0; i < SIZE; i++) \
4205
+ PREFIX ## _a[i] = CLZ (PREFIX ## _b[i]); \
4209
+PREFIX ## _popcnt (void) \
4211
+ unsigned long i; \
4213
+ for (i = 0; i < SIZE; i++) \
4214
+ PREFIX ## _a[i] = POPCNT (PREFIX ## _b[i]); \
4217
+#if !defined(DO_LONG_LONG) && !defined(DO_LONG) && !defined(DO_INT) && !defined(DO_SHORT) && !defined(DO_CHAR)
4222
+/* At the moment, only int is auto vectorized. */
4223
+DO_BUILTIN (sll, long long, __builtin_clzll, __builtin_popcountll)
4224
+DO_BUILTIN (ull, unsigned long long, __builtin_clzll, __builtin_popcountll)
4227
+#if defined(_ARCH_PPC64) && DO_LONG
4228
+DO_BUILTIN (sl, long, __builtin_clzl, __builtin_popcountl)
4229
+DO_BUILTIN (ul, unsigned long, __builtin_clzl, __builtin_popcountl)
4233
+DO_BUILTIN (si, int, __builtin_clz, __builtin_popcount)
4234
+DO_BUILTIN (ui, unsigned int, __builtin_clz, __builtin_popcount)
4238
+DO_BUILTIN (ss, short, __builtin_clz, __builtin_popcount)
4239
+DO_BUILTIN (us, unsigned short, __builtin_clz, __builtin_popcount)
4243
+DO_BUILTIN (sc, signed char, __builtin_clz, __builtin_popcount)
4244
+DO_BUILTIN (uc, unsigned char, __builtin_clz, __builtin_popcount)
4247
+/* { dg-final { scan-assembler-times "vclzw" 2 } } */
4248
+/* { dg-final { scan-assembler-times "vpopcntw" 2 } } */
4249
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57949-1.c
4250
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57949-1.c
4252
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
4253
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4254
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
4255
+/* { dg-options "-O2 -mcpu=power7 -mno-compat-align-parm" } */
4257
+/* Verify that vs is 16-byte aligned with -mcompat-align-parm. */
4259
+typedef float v4sf __attribute__ ((vector_size (16)));
4260
+struct s { long m; v4sf v; };
4264
+void pr57949 (long d1, long d2, long d3, long d4, long d5, long d6,
4265
+ long d7, long d8, long d9, struct s vs) {
4270
+/* { dg-final { scan-assembler "li \.\*,144" } } */
4271
+/* { dg-final { scan-assembler "ld \.\*,128\\(1\\)" } } */
4272
--- a/src/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
4273
+++ b/src/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
4275
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4276
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4277
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4278
+/* { dg-options "-mcpu=power8 -O2" } */
4279
+/* { dg-final { scan-assembler-times "lbarx" 7 } } */
4280
+/* { dg-final { scan-assembler-times "lharx" 7 } } */
4281
+/* { dg-final { scan-assembler-times "lwarx" 7 } } */
4282
+/* { dg-final { scan-assembler-times "ldarx" 7 } } */
4283
+/* { dg-final { scan-assembler-times "lqarx" 7 } } */
4284
+/* { dg-final { scan-assembler-times "stbcx" 7 } } */
4285
+/* { dg-final { scan-assembler-times "sthcx" 7 } } */
4286
+/* { dg-final { scan-assembler-times "stwcx" 7 } } */
4287
+/* { dg-final { scan-assembler-times "stdcx" 7 } } */
4288
+/* { dg-final { scan-assembler-times "stqcx" 7 } } */
4289
+/* { dg-final { scan-assembler-not "bl __atomic" } } */
4290
+/* { dg-final { scan-assembler-times "isync" 20 } } */
4291
+/* { dg-final { scan-assembler-times "lwsync" 10 } } */
4292
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
4293
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
4294
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
4295
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
4296
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
4298
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
4300
+char_fetch_add_relaxed (char *ptr, int value)
4302
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4306
+char_fetch_sub_consume (char *ptr, int value)
4308
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4312
+char_fetch_and_acquire (char *ptr, int value)
4314
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4318
+char_fetch_ior_release (char *ptr, int value)
4320
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4324
+char_fetch_xor_acq_rel (char *ptr, int value)
4326
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4330
+char_fetch_nand_seq_cst (char *ptr, int value)
4332
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4336
+char_val_compare_and_swap (char *p, int i, int j, char *q)
4338
+ *q = __sync_val_compare_and_swap (p, i, j);
4341
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
4343
+short_fetch_add_relaxed (short *ptr, int value)
4345
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4349
+short_fetch_sub_consume (short *ptr, int value)
4351
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4355
+short_fetch_and_acquire (short *ptr, int value)
4357
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4361
+short_fetch_ior_release (short *ptr, int value)
4363
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4367
+short_fetch_xor_acq_rel (short *ptr, int value)
4369
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4373
+short_fetch_nand_seq_cst (short *ptr, int value)
4375
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4379
+short_val_compare_and_swap (short *p, int i, int j, short *q)
4381
+ *q = __sync_val_compare_and_swap (p, i, j);
4384
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
4386
+int_fetch_add_relaxed (int *ptr, int value)
4388
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4392
+int_fetch_sub_consume (int *ptr, int value)
4394
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4398
+int_fetch_and_acquire (int *ptr, int value)
4400
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4404
+int_fetch_ior_release (int *ptr, int value)
4406
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4410
+int_fetch_xor_acq_rel (int *ptr, int value)
4412
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4416
+int_fetch_nand_seq_cst (int *ptr, int value)
4418
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4422
+int_val_compare_and_swap (int *p, int i, int j, int *q)
4424
+ *q = __sync_val_compare_and_swap (p, i, j);
4427
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
4429
+long_fetch_add_relaxed (long *ptr, long value)
4431
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4435
+long_fetch_sub_consume (long *ptr, long value)
4437
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4441
+long_fetch_and_acquire (long *ptr, long value)
4443
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4447
+long_fetch_ior_release (long *ptr, long value)
4449
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4453
+long_fetch_xor_acq_rel (long *ptr, long value)
4455
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4459
+long_fetch_nand_seq_cst (long *ptr, long value)
4461
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4465
+long_val_compare_and_swap (long *p, long i, long j, long *q)
4467
+ *q = __sync_val_compare_and_swap (p, i, j);
4470
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
4472
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
4474
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
4478
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
4480
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
4484
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
4486
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
4490
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
4492
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
4496
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
4498
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
4502
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
4504
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
4508
+quad_val_compare_and_swap (__int128_t *p, __int128_t i, __int128_t j, __int128_t *q)
4510
+ *q = __sync_val_compare_and_swap (p, i, j);
4512
--- a/src/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
4513
+++ b/src/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
4515
+/* { dg-do compile { target { powerpc*-*-* } } } */
4516
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4517
+/* { dg-require-effective-target powerpc_vsx_ok } */
4518
+/* { dg-options "-O2 -mcpu=power6 -mhard-dfp" } */
4519
+/* { dg-final { scan-assembler-not "lfiwzx" } } */
4520
+/* { dg-final { scan-assembler-times "lfd" 2 } } */
4521
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
4522
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
4523
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
4525
+/* Test that for power6 we need to use a bounce buffer on the stack to load
4526
+ SDmode variables because the power6 does not have a way to directly load
4527
+ 32-bit values from memory. */
4530
+void inc_dec32 (void)
4532
+ a += (_Decimal32) 1.0;
4534
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-4.c
4535
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-4.c
4537
/* { dg-final { scan-assembler-times "xvnmsub.dp" 2 } } */
4538
/* { dg-final { scan-assembler-times "xvrsqrtesp" 1 } } */
4539
/* { dg-final { scan-assembler-times "xvmsub.sp" 1 } } */
4540
-/* { dg-final { scan-assembler-times "xvmulsp" 4 } } */
4541
-/* { dg-final { scan-assembler-times "xvnmsub.sp" 2 } } */
4542
+/* { dg-final { scan-assembler-times "xvmulsp" 2 } } */
4543
+/* { dg-final { scan-assembler-times "xvnmsub.sp" 1 } } */
4547
--- a/src/gcc/testsuite/gcc.target/powerpc/no-r11-3.c
4548
+++ b/src/gcc/testsuite/gcc.target/powerpc/no-r11-3.c
4550
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
4551
/* { dg-skip-if "" { *-*-darwin* } { "*" } { "" } } */
4552
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
4553
/* { dg-options "-O2 -mno-pointers-to-nested-functions" } */
4555
extern void ext_call (int (func) (void));
4556
--- a/src/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
4557
+++ b/src/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
4559
+/* { dg-do compile { target { powerpc*-*-* } } } */
4560
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4561
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4562
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4564
+typedef vector unsigned long long crypto_t;
4565
+typedef vector unsigned long long v2di_t;
4566
+typedef vector unsigned int v4si_t;
4567
+typedef vector unsigned short v8hi_t;
4568
+typedef vector unsigned char v16qi_t;
4570
+crypto_t crpyto1 (crypto_t a)
4572
+ return __builtin_crypto_vsbox (a);
4575
+crypto_t crypto2 (crypto_t a, crypto_t b)
4577
+ return __builtin_crypto_vcipher (a, b);
4580
+crypto_t crypto3 (crypto_t a, crypto_t b)
4582
+ return __builtin_crypto_vcipherlast (a, b);
4585
+crypto_t crypto4 (crypto_t a, crypto_t b)
4587
+ return __builtin_crypto_vncipher (a, b);
4590
+crypto_t crypto5 (crypto_t a, crypto_t b)
4592
+ return __builtin_crypto_vncipherlast (a, b);
4595
+v16qi_t crypto6a (v16qi_t a, v16qi_t b, v16qi_t c)
4597
+ return __builtin_crypto_vpermxor (a, b, c);
4600
+v8hi_t crypto6b (v8hi_t a, v8hi_t b, v8hi_t c)
4602
+ return __builtin_crypto_vpermxor (a, b, c);
4605
+v4si_t crypto6c (v4si_t a, v4si_t b, v4si_t c)
4607
+ return __builtin_crypto_vpermxor (a, b, c);
4610
+v2di_t crypto6d (v2di_t a, v2di_t b, v2di_t c)
4612
+ return __builtin_crypto_vpermxor (a, b, c);
4615
+v16qi_t crypto7a (v16qi_t a, v16qi_t b)
4617
+ return __builtin_crypto_vpmsumb (a, b);
4620
+v16qi_t crypto7b (v16qi_t a, v16qi_t b)
4622
+ return __builtin_crypto_vpmsum (a, b);
4625
+v8hi_t crypto7c (v8hi_t a, v8hi_t b)
4627
+ return __builtin_crypto_vpmsumh (a, b);
4630
+v8hi_t crypto7d (v8hi_t a, v8hi_t b)
4632
+ return __builtin_crypto_vpmsum (a, b);
4635
+v4si_t crypto7e (v4si_t a, v4si_t b)
4637
+ return __builtin_crypto_vpmsumw (a, b);
4640
+v4si_t crypto7f (v4si_t a, v4si_t b)
4642
+ return __builtin_crypto_vpmsum (a, b);
4645
+v2di_t crypto7g (v2di_t a, v2di_t b)
4647
+ return __builtin_crypto_vpmsumd (a, b);
4650
+v2di_t crypto7h (v2di_t a, v2di_t b)
4652
+ return __builtin_crypto_vpmsum (a, b);
4655
+v2di_t crypto8a (v2di_t a)
4657
+ return __builtin_crypto_vshasigmad (a, 0, 8);
4660
+v2di_t crypto8b (v2di_t a)
4662
+ return __builtin_crypto_vshasigma (a, 0, 8);
4665
+v4si_t crypto8c (v4si_t a)
4667
+ return __builtin_crypto_vshasigmaw (a, 1, 15);
4670
+v4si_t crypto8d (v4si_t a)
4672
+ return __builtin_crypto_vshasigma (a, 1, 15);
4675
+/* Note space is used after the instruction so that vcipherlast does not match
4677
+/* { dg-final { scan-assembler-times "vcipher " 1 } } */
4678
+/* { dg-final { scan-assembler-times "vcipherlast " 1 } } */
4679
+/* { dg-final { scan-assembler-times "vncipher " 1 } } */
4680
+/* { dg-final { scan-assembler-times "vncipherlast " 1 } } */
4681
+/* { dg-final { scan-assembler-times "vpermxor " 4 } } */
4682
+/* { dg-final { scan-assembler-times "vpmsumb " 2 } } */
4683
+/* { dg-final { scan-assembler-times "vpmsumd " 2 } } */
4684
+/* { dg-final { scan-assembler-times "vpmsumh " 2 } } */
4685
+/* { dg-final { scan-assembler-times "vpmsumw " 2 } } */
4686
+/* { dg-final { scan-assembler-times "vsbox " 1 } } */
4687
+/* { dg-final { scan-assembler-times "vshasigmad " 2 } } */
4688
+/* { dg-final { scan-assembler-times "vshasigmaw " 2 } } */
4689
--- a/src/gcc/testsuite/gcc.target/powerpc/pr42747.c
4690
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr42747.c
4693
double foo (double x) { return __builtin_sqrt (x); }
4695
-/* { dg-final { scan-assembler "xssqrtdp" } } */
4696
+/* { dg-final { scan-assembler "xssqrtdp\|fsqrt" } } */
4697
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c
4698
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-dd-2.c
4700
+/* Test generation of DFP instructions for POWER6. */
4701
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
4702
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
4704
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
4705
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
4706
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
4707
+/* { dg-final { scan-assembler-times "fmr" 0 } } */
4710
+func1 (_Decimal64 a, _Decimal64 b)
4716
+func2 (_Decimal64 a, _Decimal64 b)
4718
+ return __builtin_fabsd64 (b);
4722
+func3 (_Decimal64 a, _Decimal64 b)
4724
+ return - __builtin_fabsd64 (b);
4726
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
4727
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
4729
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
4730
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4731
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
4732
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4733
+/* { dg-options "-mcpu=power8 -O2" } */
4734
+/* { dg-final { scan-assembler "mtvsrd" } } */
4735
+/* { dg-final { scan-assembler "mfvsrd" } } */
4736
+/* { dg-final { scan-assembler "xscvdpspn" } } */
4737
+/* { dg-final { scan-assembler "xscvspdpn" } } */
4739
+/* Check code generation for direct move for float types. */
4743
+#define NO_ALTIVEC 1
4744
+#define VSX_REG_ATTR "ww"
4746
+#include "direct-move.h"
4747
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-td-2.c
4748
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-td-2.c
4750
+/* Test generation of DFP instructions for POWER6. */
4751
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
4752
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
4754
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
4755
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
4756
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
4757
+/* { dg-final { scan-assembler-times "fmr" 0 } } */
4759
+/* These tests verify we only generate fneg, fabs and fnabs
4760
+ instructions and no fmr's since these are done in place. */
4763
+func1 (_Decimal128 a)
4769
+func2 (_Decimal128 a)
4771
+ return __builtin_fabsd128 (a);
4775
+func3 (_Decimal128 a)
4777
+ return - __builtin_fabsd128 (a);
4779
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
4780
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
4782
+/* { dg-do compile { target { powerpc*-*-* } } } */
4783
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4784
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4785
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4787
+#include <altivec.h>
4798
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
4801
+#define DOIT(TYPE, PREFIX) \
4802
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b) \
4804
+ return vec_eqv (a, b); \
4807
+TYPE PREFIX ## _eqv_arith (TYPE a, TYPE b) \
4809
+ return ~(a ^ b); \
4812
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b) \
4814
+ return vec_nand (a, b); \
4817
+TYPE PREFIX ## _nand_arith1 (TYPE a, TYPE b) \
4819
+ return ~(a & b); \
4822
+TYPE PREFIX ## _nand_arith2 (TYPE a, TYPE b) \
4824
+ return (~a) | (~b); \
4827
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b) \
4829
+ return vec_orc (a, b); \
4832
+TYPE PREFIX ## _orc_arith1 (TYPE a, TYPE b) \
4834
+ return (~ a) | b; \
4837
+TYPE PREFIX ## _orc_arith2 (TYPE a, TYPE b) \
4839
+ return a | (~ b); \
4842
+#define DOIT_FLOAT(TYPE, PREFIX) \
4843
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b) \
4845
+ return vec_eqv (a, b); \
4848
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b) \
4850
+ return vec_nand (a, b); \
4853
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b) \
4855
+ return vec_orc (a, b); \
4858
+typedef vector signed char sign_char_vec;
4859
+typedef vector short sign_short_vec;
4860
+typedef vector int sign_int_vec;
4861
+typedef vector long long sign_llong_vec;
4863
+typedef vector unsigned char uns_char_vec;
4864
+typedef vector unsigned short uns_short_vec;
4865
+typedef vector unsigned int uns_int_vec;
4866
+typedef vector unsigned long long uns_llong_vec;
4868
+typedef vector float float_vec;
4869
+typedef vector double double_vec;
4871
+DOIT(sign_char_vec, sign_char)
4872
+DOIT(sign_short_vec, sign_short)
4873
+DOIT(sign_int_vec, sign_int)
4874
+DOIT(sign_llong_vec, sign_llong)
4876
+DOIT(uns_char_vec, uns_char)
4877
+DOIT(uns_short_vec, uns_short)
4878
+DOIT(uns_int_vec, uns_int)
4879
+DOIT(uns_llong_vec, uns_llong)
4881
+DOIT_FLOAT(float_vec, float)
4882
+DOIT_FLOAT(double_vec, double)
4884
+/* { dg-final { scan-assembler-times "xxleqv" 18 } } */
4885
+/* { dg-final { scan-assembler-times "xxlnand" 26 } } */
4886
+/* { dg-final { scan-assembler-times "xxlorc" 26 } } */
4887
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
4888
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
4890
+/* { dg-do compile { target { powerpc*-*-* } } } */
4891
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4892
+/* { dg-require-effective-target powerpc_p8vector_ok } */
4893
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
4904
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
4908
+#define TYPE unsigned int
4911
+TYPE in1 [SIZE] ATTR_ALIGN;
4912
+TYPE in2 [SIZE] ATTR_ALIGN;
4913
+TYPE eqv [SIZE] ATTR_ALIGN;
4914
+TYPE nand1[SIZE] ATTR_ALIGN;
4915
+TYPE nand2[SIZE] ATTR_ALIGN;
4916
+TYPE orc1 [SIZE] ATTR_ALIGN;
4917
+TYPE orc2 [SIZE] ATTR_ALIGN;
4924
+ for (i = 0; i < SIZE; i++)
4926
+ eqv[i] = ~(in1[i] ^ in2[i]);
4935
+ for (i = 0; i < SIZE; i++)
4937
+ nand1[i] = ~(in1[i] & in2[i]);
4946
+ for (i = 0; i < SIZE; i++)
4948
+ nand2[i] = (~in1[i]) | (~in2[i]);
4957
+ for (i = 0; i < SIZE; i++)
4959
+ orc1[i] = (~in1[i]) | in2[i];
4968
+ for (i = 0; i < SIZE; i++)
4970
+ orc1[i] = in1[i] | (~in2[i]);
4974
+/* { dg-final { scan-assembler-times "xxleqv" 1 } } */
4975
+/* { dg-final { scan-assembler-times "xxlnand" 2 } } */
4976
+/* { dg-final { scan-assembler-times "xxlorc" 2 } } */
4977
--- a/src/gcc/testsuite/gcc.target/powerpc/pr57949-2.c
4978
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr57949-2.c
4980
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
4981
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
4982
+/* { dg-skip-if "" { powerpc_elfv2 } { "*" } { "" } } */
4983
+/* { dg-options "-O2 -mcpu=power7" } */
4985
+/* Verify that vs is not 16-byte aligned in the absence of -mno-compat-align-parm. */
4987
+typedef float v4sf __attribute__ ((vector_size (16)));
4988
+struct s { long m; v4sf v; };
4992
+void pr57949 (long d1, long d2, long d3, long d4, long d5, long d6,
4993
+ long d7, long d8, long d9, struct s vs) {
4998
+/* { dg-final { scan-assembler "ld .\*,136\\(1\\)" } } */
4999
+/* { dg-final { scan-assembler "ld .\*,120\\(1\\)" } } */
5000
--- a/src/gcc/testsuite/gcc.target/powerpc/recip-5.c
5001
+++ b/src/gcc/testsuite/gcc.target/powerpc/recip-5.c
5003
/* { dg-options "-O3 -ftree-vectorize -mrecip=all -ffast-math -mcpu=power7 -fno-unroll-loops" } */
5004
/* { dg-final { scan-assembler-times "xvredp" 4 } } */
5005
/* { dg-final { scan-assembler-times "xvresp" 5 } } */
5006
-/* { dg-final { scan-assembler-times "xsredp" 2 } } */
5007
-/* { dg-final { scan-assembler-times "fres" 2 } } */
5008
+/* { dg-final { scan-assembler-times "xsredp\|fre\ " 2 } } */
5009
+/* { dg-final { scan-assembler-times "xsresp\|fres" 2 } } */
5010
+/* { dg-final { scan-assembler-times "xsmulsp\|fmuls" 2 } } */
5011
+/* { dg-final { scan-assembler-times "xsnmsub.sp\|fnmsubs" 2 } } */
5012
+/* { dg-final { scan-assembler-times "xsmuldp\|fmul\ " 2 } } */
5013
+/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 4 } } */
5014
+/* { dg-final { scan-assembler-times "xvmulsp" 7 } } */
5015
+/* { dg-final { scan-assembler-times "xvnmsub.sp" 5 } } */
5016
+/* { dg-final { scan-assembler-times "xvmuldp" 6 } } */
5017
+/* { dg-final { scan-assembler-times "xvnmsub.dp" 8 } } */
5019
#include <altivec.h>
5021
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
5022
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
5034
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
5035
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
5037
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5038
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5039
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5040
+/* { dg-require-effective-target p8vector_hw } */
5041
+/* { dg-options "-mcpu=power8 -O2" } */
5043
+/* Check whether we get the right bits for direct move at runtime. */
5047
+#define NO_ALTIVEC 1
5049
+#define VSX_REG_ATTR "ww"
5051
+#include "direct-move.h"
5052
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
5053
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
5055
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
5056
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5057
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5058
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5059
+/* { dg-options "-mcpu=power8 -O2" } */
5060
+/* { dg-final { scan-assembler "mtvsrd" } } */
5061
+/* { dg-final { scan-assembler "mfvsrd" } } */
5063
+/* Check code generation for direct move for double types. */
5065
+#define TYPE double
5067
+#define NO_ALTIVEC 1
5068
+#define VSX_REG_ATTR "ws"
5070
+#include "direct-move.h"
5071
--- a/src/gcc/testsuite/gcc.target/powerpc/dfp-td-3.c
5072
+++ b/src/gcc/testsuite/gcc.target/powerpc/dfp-td-3.c
5074
+/* Test generation of DFP instructions for POWER6. */
5075
+/* { dg-do compile { target { powerpc*-*-linux* && powerpc_fprs } } } */
5076
+/* { dg-options "-std=gnu99 -O1 -mcpu=power6" } */
5078
+/* { dg-final { scan-assembler-times "fneg" 1 } } */
5079
+/* { dg-final { scan-assembler-times "fabs" 1 } } */
5080
+/* { dg-final { scan-assembler-times "fnabs" 1 } } */
5081
+/* { dg-final { scan-assembler-times "fmr" 3 } } */
5083
+/* These tests verify we generate fneg, fabs and fnabs and
5084
+ associated fmr's since these are not done in place. */
5087
+func1 (_Decimal128 a, _Decimal128 b)
5093
+func2 (_Decimal128 a, _Decimal128 b)
5095
+ return __builtin_fabsd128 (b);
5099
+func3 (_Decimal128 a, _Decimal128 b)
5101
+ return - __builtin_fabsd128 (b);
5103
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
5104
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
5106
+/* { dg-do compile { target { powerpc*-*-* } } } */
5107
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5108
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5109
+/* { dg-options "-mcpu=power8 -O2" } */
5111
+vector float dbl_to_float_p8 (double x) { return __builtin_vsx_xscvdpspn (x); }
5112
+double float_to_dbl_p8 (vector float x) { return __builtin_vsx_xscvspdpn (x); }
5114
+/* { dg-final { scan-assembler "xscvdpspn" } } */
5115
+/* { dg-final { scan-assembler "xscvspdpn" } } */
5116
--- a/src/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
5117
+++ b/src/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
5119
/* { dg-final { scan-assembler "xvrspiz" } } */
5120
/* { dg-final { scan-assembler "xsrdpi" } } */
5121
/* { dg-final { scan-assembler "xsrdpic" } } */
5122
-/* { dg-final { scan-assembler "xsrdpim" } } */
5123
-/* { dg-final { scan-assembler "xsrdpip" } } */
5124
-/* { dg-final { scan-assembler "xsrdpiz" } } */
5125
+/* { dg-final { scan-assembler "xsrdpim\|frim" } } */
5126
+/* { dg-final { scan-assembler "xsrdpip\|frip" } } */
5127
+/* { dg-final { scan-assembler "xsrdpiz\|friz" } } */
5128
/* { dg-final { scan-assembler "xsmaxdp" } } */
5129
/* { dg-final { scan-assembler "xsmindp" } } */
5130
/* { dg-final { scan-assembler "xxland" } } */
5131
--- a/src/gcc/testsuite/gcc.target/powerpc/htm-builtin-1.c
5132
+++ b/src/gcc/testsuite/gcc.target/powerpc/htm-builtin-1.c
5134
+/* { dg-do compile { target { powerpc*-*-* } } } */
5135
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5136
+/* { dg-require-effective-target powerpc_htm_ok } */
5137
+/* { dg-options "-O2 -mhtm" } */
5139
+/* { dg-final { scan-assembler-times "tbegin\\." 1 } } */
5140
+/* { dg-final { scan-assembler-times "tend\\." 2 } } */
5141
+/* { dg-final { scan-assembler-times "tabort\\." 2 } } */
5142
+/* { dg-final { scan-assembler-times "tabortdc\\." 1 } } */
5143
+/* { dg-final { scan-assembler-times "tabortdci\\." 1 } } */
5144
+/* { dg-final { scan-assembler-times "tabortwc\\." 1 } } */
5145
+/* { dg-final { scan-assembler-times "tabortwci\\." 2 } } */
5146
+/* { dg-final { scan-assembler-times "tcheck\\." 1 } } */
5147
+/* { dg-final { scan-assembler-times "trechkpt\\." 1 } } */
5148
+/* { dg-final { scan-assembler-times "treclaim\\." 1 } } */
5149
+/* { dg-final { scan-assembler-times "tsr\\." 3 } } */
5150
+/* { dg-final { scan-assembler-times "mfspr" 4 } } */
5151
+/* { dg-final { scan-assembler-times "mtspr" 4 } } */
5153
+void use_builtins (long *p, char code, long *a, long *b)
5155
+ p[0] = __builtin_tbegin (0);
5156
+ p[1] = __builtin_tend (0);
5157
+ p[2] = __builtin_tendall ();
5158
+ p[3] = __builtin_tabort (0);
5159
+ p[4] = __builtin_tabort (code);
5161
+ p[5] = __builtin_tabortdc (0xf, a[5], b[5]);
5162
+ p[6] = __builtin_tabortdci (0xf, a[6], 13);
5163
+ p[7] = __builtin_tabortwc (0xf, a[7], b[7]);
5164
+ p[8] = __builtin_tabortwci (0xf, a[8], 13);
5166
+ p[9] = __builtin_tcheck (5);
5167
+ p[10] = __builtin_trechkpt ();
5168
+ p[11] = __builtin_treclaim (0);
5169
+ p[12] = __builtin_tresume ();
5170
+ p[13] = __builtin_tsuspend ();
5171
+ p[14] = __builtin_tsr (0);
5172
+ p[15] = __builtin_ttest (); /* This expands to a tabortwci. */
5175
+ p[16] = __builtin_get_texasr ();
5176
+ p[17] = __builtin_get_texasru ();
5177
+ p[18] = __builtin_get_tfhar ();
5178
+ p[19] = __builtin_get_tfiar ();
5180
+ __builtin_set_texasr (a[20]);
5181
+ __builtin_set_texasru (a[21]);
5182
+ __builtin_set_tfhar (a[22]);
5183
+ __builtin_set_tfiar (a[23]);
5185
--- a/src/gcc/testsuite/gcc.target/powerpc/bool.c
5186
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool.c
5188
+/* { dg-do compile { target { powerpc*-*-* } } } */
5189
+/* { dg-options "-O2" } */
5190
+/* { dg-final { scan-assembler "eqv" } } */
5191
+/* { dg-final { scan-assembler "nand" } } */
5192
+/* { dg-final { scan-assembler "nor" } } */
5195
+#define TYPE unsigned long
5198
+TYPE op1 (TYPE a, TYPE b) { return ~(a ^ b); } /* eqv */
5199
+TYPE op2 (TYPE a, TYPE b) { return ~(a & b); } /* nand */
5200
+TYPE op3 (TYPE a, TYPE b) { return ~(a | b); } /* nor */
5202
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
5203
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
5205
+/* { dg-do compile { target { powerpc*-*-* } } } */
5206
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5207
+/* { dg-require-effective-target powerpc_altivec_ok } */
5208
+/* { dg-options "-O2 -mcpu=power5 -mabi=altivec -mno-altivec -mno-vsx" } */
5209
+/* { dg-final { scan-assembler "\[ \t\]and " } } */
5210
+/* { dg-final { scan-assembler "\[ \t\]or " } } */
5211
+/* { dg-final { scan-assembler "\[ \t\]xor " } } */
5212
+/* { dg-final { scan-assembler "\[ \t\]nor " } } */
5213
+/* { dg-final { scan-assembler "\[ \t\]andc " } } */
5214
+/* { dg-final { scan-assembler "\[ \t\]eqv " } } */
5215
+/* { dg-final { scan-assembler "\[ \t\]orc " } } */
5216
+/* { dg-final { scan-assembler "\[ \t\]nand " } } */
5217
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
5218
+/* { dg-final { scan-assembler-not "\[ \t\]vandc " } } */
5219
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
5220
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
5221
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
5222
+/* { dg-final { scan-assembler-not "\[ \t\]xxland " } } */
5223
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor " } } */
5224
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor " } } */
5225
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor " } } */
5226
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
5227
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
5228
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
5229
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
5232
+typedef int v4si __attribute__ ((vector_size (16)));
5237
--- a/src/gcc/testsuite/gcc.target/powerpc/fusion.c
5238
+++ b/src/gcc/testsuite/gcc.target/powerpc/fusion.c
5240
+/* { dg-do compile { target { powerpc*-*-* } } } */
5241
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5242
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
5243
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5244
+/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
5246
+#define LARGE 0x12345
5248
+int fusion_uchar (unsigned char *p){ return p[LARGE]; }
5249
+int fusion_schar (signed char *p){ return p[LARGE]; }
5250
+int fusion_ushort (unsigned short *p){ return p[LARGE]; }
5251
+int fusion_short (short *p){ return p[LARGE]; }
5252
+int fusion_int (int *p){ return p[LARGE]; }
5253
+unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
5255
+vector double fusion_vector (vector double *p) { return p[2]; }
5257
+/* { dg-final { scan-assembler-times "gpr load fusion" 6 } } */
5258
+/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */
5259
+/* { dg-final { scan-assembler-times "lbz" 2 } } */
5260
+/* { dg-final { scan-assembler-times "extsb" 1 } } */
5261
+/* { dg-final { scan-assembler-times "lhz" 2 } } */
5262
+/* { dg-final { scan-assembler-times "extsh" 1 } } */
5263
+/* { dg-final { scan-assembler-times "lwz" 2 } } */
5264
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
5265
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
5266
@@ -107,8 +107,10 @@
5277
@@ -119,6 +121,12 @@
5281
+#ifdef __LITTLE_ENDIAN__
5282
+#define MAKE_SLOT(x, y) ((long)x | ((long)y << 32))
5284
+#define MAKE_SLOT(x, y) ((long)y | ((long)x << 32))
5287
/* Paramter passing.
5291
sp = __builtin_frame_address(0);
5294
- if (sp->slot[2].l != 0x100000002ULL
5295
- || sp->slot[4].l != 0x500000006ULL)
5296
+ if (sp->slot[2].l != MAKE_SLOT (1, 2)
5297
+ || sp->slot[4].l != MAKE_SLOT (5, 6))
5302
sp = __builtin_frame_address(0);
5305
- if (sp->slot[4].l != 0x100000002ULL
5306
- || sp->slot[6].l != 0x500000006ULL)
5307
+ if (sp->slot[4].l != MAKE_SLOT (1, 2)
5308
+ || sp->slot[6].l != MAKE_SLOT (5, 6))
5313
sp = __builtin_frame_address(0);
5316
- if (sp->slot[4].l != 0x100000002ULL
5317
- || sp->slot[6].l != 0x500000006ULL)
5318
+ if (sp->slot[4].l != MAKE_SLOT (1, 2)
5319
+ || sp->slot[6].l != MAKE_SLOT (5, 6))
5323
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
5324
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
5326
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
5327
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5328
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5329
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5330
+/* { dg-options "-mcpu=power8 -O2" } */
5331
+/* { dg-final { scan-assembler "mtvsrd" } } */
5332
+/* { dg-final { scan-assembler "mfvsrd" } } */
5334
+/* Check code generation for direct move for long types. */
5338
+#define NO_ALTIVEC 1
5339
+#define VSX_REG_ATTR "d"
5341
+#include "direct-move.h"
5342
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
5343
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
5345
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5346
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5347
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5348
+/* { dg-require-effective-target p8vector_hw } */
5349
+/* { dg-options "-mcpu=power8 -O2" } */
5351
+/* Check whether we get the right bits for direct move at runtime. */
5353
+#define TYPE double
5355
+#define NO_ALTIVEC 1
5357
+#define VSX_REG_ATTR "ws"
5359
+#include "direct-move.h"
5360
--- a/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
5361
+++ b/src/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
5363
+/* { dg-do compile { target { powerpc*-*-* } } } */
5364
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5365
+/* { dg-require-effective-target powerpc_p8vector_ok } */
5366
+/* { dg-options "-mcpu=power8 -O2" } */
5368
+#include <altivec.h>
5370
+typedef vector int v_sign;
5371
+typedef vector unsigned int v_uns;
5373
+v_sign even_sign (v_sign a, v_sign b)
5375
+ return vec_vmrgew (a, b);
5378
+v_uns even_uns (v_uns a, v_uns b)
5380
+ return vec_vmrgew (a, b);
5383
+v_sign odd_sign (v_sign a, v_sign b)
5385
+ return vec_vmrgow (a, b);
5388
+v_uns odd_uns (v_uns a, v_uns b)
5390
+ return vec_vmrgow (a, b);
5393
+/* { dg-final { scan-assembler-times "vmrgew" 2 } } */
5394
+/* { dg-final { scan-assembler-times "vmrgow" 2 } } */
5395
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2.h
5396
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2.h
5398
+/* Test various logical operations. */
5400
+TYPE arg1 (TYPE p, TYPE q) { return p & q; } /* AND */
5401
+TYPE arg2 (TYPE p, TYPE q) { return p | q; } /* OR */
5402
+TYPE arg3 (TYPE p, TYPE q) { return p ^ q; } /* XOR */
5403
+TYPE arg4 (TYPE p) { return ~ p; } /* NOR */
5404
+TYPE arg5 (TYPE p, TYPE q) { return ~(p & q); } /* NAND */
5405
+TYPE arg6 (TYPE p, TYPE q) { return ~(p | q); } /* NOR */
5406
+TYPE arg7 (TYPE p, TYPE q) { return ~(p ^ q); } /* EQV */
5407
+TYPE arg8 (TYPE p, TYPE q) { return (~p) & q; } /* ANDC */
5408
+TYPE arg9 (TYPE p, TYPE q) { return (~p) | q; } /* ORC */
5409
+TYPE arg10(TYPE p, TYPE q) { return (~p) ^ q; } /* EQV */
5410
+TYPE arg11(TYPE p, TYPE q) { return p & (~q); } /* ANDC */
5411
+TYPE arg12(TYPE p, TYPE q) { return p | (~q); } /* ORC */
5412
+TYPE arg13(TYPE p, TYPE q) { return p ^ (~q); } /* EQV */
5414
+void ptr1 (TYPE *p) { p[0] = p[1] & p[2]; } /* AND */
5415
+void ptr2 (TYPE *p) { p[0] = p[1] | p[2]; } /* OR */
5416
+void ptr3 (TYPE *p) { p[0] = p[1] ^ p[2]; } /* XOR */
5417
+void ptr4 (TYPE *p) { p[0] = ~p[1]; } /* NOR */
5418
+void ptr5 (TYPE *p) { p[0] = ~(p[1] & p[2]); } /* NAND */
5419
+void ptr6 (TYPE *p) { p[0] = ~(p[1] | p[2]); } /* NOR */
5420
+void ptr7 (TYPE *p) { p[0] = ~(p[1] ^ p[2]); } /* EQV */
5421
+void ptr8 (TYPE *p) { p[0] = ~(p[1]) & p[2]; } /* ANDC */
5422
+void ptr9 (TYPE *p) { p[0] = (~p[1]) | p[2]; } /* ORC */
5423
+void ptr10(TYPE *p) { p[0] = (~p[1]) ^ p[2]; } /* EQV */
5424
+void ptr11(TYPE *p) { p[0] = p[1] & (~p[2]); } /* ANDC */
5425
+void ptr12(TYPE *p) { p[0] = p[1] | (~p[2]); } /* ORC */
5426
+void ptr13(TYPE *p) { p[0] = p[1] ^ (~p[2]); } /* EQV */
5427
--- a/src/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
5428
+++ b/src/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
5430
/* { dg-do compile } */
5431
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5432
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
5433
/* { dg-require-effective-target powerpc_vsx_ok } */
5434
/* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */
5435
/* { dg-final { scan-assembler-times "xvaddsp" 3 } } */
5436
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c
5437
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c
5441
/* Wrapper to save the GPRs and FPRs and then jump to the real function. */
5443
+#define FUNC_START(NAME) \
5444
+ "\t.globl\t" NAME "\n\t" \
5445
+ ".section \".opd\",\"aw\"\n\t" \
5448
+ ".quad .L." NAME ",.TOC.@tocbase,0\n\t" \
5450
+ ".type " NAME ", @function\n" \
5451
+ ".L." NAME ":\n\t"
5453
+#define FUNC_START(NAME) \
5454
+ "\t.globl\t" NAME "\n\t" \
5457
+ "0:\taddis 2,12,(.TOC.-0b)@ha\n\t" \
5458
+ "addi 2,2,(.TOC.-0b)@l\n\t" \
5459
+ ".localentry " NAME ",.-" NAME "\n\t"
5461
#define WRAPPER(NAME) \
5462
-__asm__ ("\t.globl\t" #NAME "_asm\n\t" \
5463
- ".section \".opd\",\"aw\"\n\t" \
5465
- #NAME "_asm:\n\t" \
5466
- ".quad .L." #NAME "_asm,.TOC.@tocbase,0\n\t" \
5468
- ".type " #NAME "_asm, @function\n" \
5469
- ".L." #NAME "_asm:\n\t" \
5470
+__asm__ (FUNC_START (#NAME "_asm") \
5471
"ld 11,gparms@got(2)\n\t" \
5482
unsigned long slot[100];
5485
--- a/src/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
5486
+++ b/src/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
5488
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
5489
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5490
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
5491
+/* { dg-require-effective-target p8vector_hw } */
5492
+/* { dg-options "-mcpu=power8 -O2" } */
5494
+/* Check whether we get the right bits for direct move at runtime. */
5498
+#define NO_ALTIVEC 1
5500
+#define VSX_REG_ATTR "d"
5502
+#include "direct-move.h"
5503
--- a/src/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
5504
+++ b/src/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
5506
+/* { dg-do compile { target { powerpc*-*-* } } } */
5507
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5508
+/* { dg-require-effective-target powerpc_vsx_ok } */
5509
+/* { dg-options "-O2 -mcpu=power7" } */
5510
+/* { dg-final { scan-assembler "xxlxor" } } */
5512
+/* Test that we generate xxlor to clear a SFmode register. */
5514
+float sum (float *p, unsigned long n)
5516
+ float sum = 0.0f; /* generate xxlxor instead of load */
5522
--- a/src/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
5523
+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
5525
/* { dg-final { scan-assembler-times "fabs" 3 } } */
5526
/* { dg-final { scan-assembler-times "fnabs" 3 } } */
5527
/* { dg-final { scan-assembler-times "fsel" 3 } } */
5528
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
5529
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
5530
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
5532
double normal1 (double, double);
5533
double power5 (double, double) __attribute__((__target__("cpu=power5")));
5534
--- a/src/gcc/testsuite/gcc.target/powerpc/bool3.h
5535
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool3.h
5537
+/* Test forcing 128-bit logical types into GPR registers. */
5539
+#if defined(NO_ASM)
5540
+#define FORCE_REG1(X)
5541
+#define FORCE_REG2(X,Y)
5544
+#if defined(USE_ALTIVEC)
5545
+#define REG_CLASS "+v"
5546
+#define PRINT_REG1 "# altivec reg %0"
5547
+#define PRINT_REG2 "# altivec reg %0, %1"
5549
+#elif defined(USE_FPR)
5550
+#define REG_CLASS "+d"
5551
+#define PRINT_REG1 "# fpr reg %0"
5552
+#define PRINT_REG2 "# fpr reg %0, %1"
5554
+#elif defined(USE_VSX)
5555
+#define REG_CLASS "+wa"
5556
+#define PRINT_REG1 "# vsx reg %x0"
5557
+#define PRINT_REG2 "# vsx reg %x0, %x1"
5560
+#define REG_CLASS "+r"
5561
+#define PRINT_REG1 "# gpr reg %0"
5562
+#define PRINT_REG2 "# gpr reg %0, %1"
5565
+#define FORCE_REG1(X) __asm__ (PRINT_REG1 : REG_CLASS (X))
5566
+#define FORCE_REG2(X,Y) __asm__ (PRINT_REG2 : REG_CLASS (X), REG_CLASS (Y))
5569
+void ptr1 (TYPE *p)
5575
+ FORCE_REG2 (a, b);
5576
+ c = a & b; /* AND */
5581
+void ptr2 (TYPE *p)
5587
+ FORCE_REG2 (a, b);
5588
+ c = a | b; /* OR */
5593
+void ptr3 (TYPE *p)
5599
+ FORCE_REG2 (a, b);
5600
+ c = a ^ b; /* XOR */
5605
+void ptr4 (TYPE *p)
5616
+void ptr5 (TYPE *p)
5622
+ FORCE_REG2 (a, b);
5623
+ c = ~(a & b); /* NAND */
5628
+void ptr6 (TYPE *p)
5634
+ FORCE_REG2 (a, b);
5635
+ c = ~(a | b); /* AND */
5640
+void ptr7 (TYPE *p)
5646
+ FORCE_REG2 (a, b);
5647
+ c = ~(a ^ b); /* EQV */
5652
+void ptr8 (TYPE *p)
5658
+ FORCE_REG2 (a, b);
5659
+ c = (~a) & b; /* ANDC */
5664
+void ptr9 (TYPE *p)
5670
+ FORCE_REG2 (a, b);
5671
+ c = (~a) | b; /* ORC */
5676
+void ptr10 (TYPE *p)
5682
+ FORCE_REG2 (a, b);
5683
+ c = (~a) ^ b; /* EQV */
5688
+void ptr11 (TYPE *p)
5694
+ FORCE_REG2 (a, b);
5695
+ c = a & (~b); /* ANDC */
5700
+void ptr12 (TYPE *p)
5706
+ FORCE_REG2 (a, b);
5707
+ c = a | (~b); /* ORC */
5712
+void ptr13 (TYPE *p)
5718
+ FORCE_REG2 (a, b);
5719
+ c = a ^ (~b); /* AND */
5723
--- a/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
5724
+++ b/src/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
5726
return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, });
5731
- return __builtin_shuffle(x, y,
5732
- (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
5738
- return __builtin_shuffle(x, y,
5739
- (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
5744
return __builtin_shuffle(x, y,
5746
/* { dg-final { scan-assembler "vspltb" } } */
5747
/* { dg-final { scan-assembler "vsplth" } } */
5748
/* { dg-final { scan-assembler "vspltw" } } */
5749
-/* { dg-final { scan-assembler "vpkuhum" } } */
5750
-/* { dg-final { scan-assembler "vpkuwum" } } */
5751
--- a/src/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
5752
+++ b/src/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
5754
+/* { dg-do compile { target { powerpc*-*-* } } } */
5755
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
5756
+/* { dg-require-effective-target powerpc_vsx_ok } */
5757
+/* { dg-options "-O2 -mcpu=power7" } */
5758
+/* { dg-final { scan-assembler-not "\[ \t\]and " } } */
5759
+/* { dg-final { scan-assembler-not "\[ \t\]or " } } */
5760
+/* { dg-final { scan-assembler-not "\[ \t\]xor " } } */
5761
+/* { dg-final { scan-assembler-not "\[ \t\]nor " } } */
5762
+/* { dg-final { scan-assembler-not "\[ \t\]eqv " } } */
5763
+/* { dg-final { scan-assembler-not "\[ \t\]andc " } } */
5764
+/* { dg-final { scan-assembler-not "\[ \t\]orc " } } */
5765
+/* { dg-final { scan-assembler-not "\[ \t\]nand " } } */
5766
+/* { dg-final { scan-assembler-not "\[ \t\]vand " } } */
5767
+/* { dg-final { scan-assembler-not "\[ \t\]vor " } } */
5768
+/* { dg-final { scan-assembler-not "\[ \t\]vxor " } } */
5769
+/* { dg-final { scan-assembler-not "\[ \t\]vnor " } } */
5770
+/* { dg-final { scan-assembler "\[ \t\]xxland " } } */
5771
+/* { dg-final { scan-assembler "\[ \t\]xxlor " } } */
5772
+/* { dg-final { scan-assembler "\[ \t\]xxlxor " } } */
5773
+/* { dg-final { scan-assembler "\[ \t\]xxlnor " } } */
5774
+/* { dg-final { scan-assembler "\[ \t\]xxlandc " } } */
5775
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv " } } */
5776
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc " } } */
5777
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
5780
+typedef int v4si __attribute__ ((vector_size (16)));
5785
--- a/src/gcc/testsuite/ChangeLog.ibm
5786
+++ b/src/gcc/testsuite/ChangeLog.ibm
5788
+2014-01-14 Michael Meissner <meissner@linux.vnet.ibm.com>
5790
+ Backport from mainline
5792
+ 2013-10-23 Pat Haugen <pthaugen@us.ibm.com>
5794
+ * gcc.target/powerpc/direct-move.h: Fix header for executable tests.
5796
+2013-12-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5798
+ Backport from mainline r205638
5799
+ 2013-12-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5801
+ * gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c: Skip for little
5804
+2013-11-27 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5806
+ Backport from mainline r205464
5807
+ 2013-11-27 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5809
+ * gfortran.dg/nan_7.f90: Disable for little endian PowerPC.
5811
+2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
5813
+ Backport from mainline
5814
+ 2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
5817
+ * gcc.target/powerpc/direct-move.h (VSX_REG_ATTR): Allow test to
5818
+ specify an appropriate register class for VSX operations.
5819
+ (load_vsx): Use it.
5820
+ (load_gpr_to_vsx): Likewise.
5821
+ (load_vsx_to_gpr): Likewise.
5822
+ * gcc.target/powerpc/direct-move-vint1.c: Use an appropriate
5823
+ register class for VSX registers that the type can handle. Remove
5824
+ checks for explicit number of instructions generated, just check
5825
+ if the instruction is generated.
5826
+ * gcc.target/powerpc/direct-move-vint2.c: Likewise.
5827
+ * gcc.target/powerpc/direct-move-float1.c: Likewise.
5828
+ * gcc.target/powerpc/direct-move-float2.c: Likewise.
5829
+ * gcc.target/powerpc/direct-move-double1.c: Likewise.
5830
+ * gcc.target/powerpc/direct-move-double2.c: Likewise.
5831
+ * gcc.target/powerpc/direct-move-long1.c: Likewise.
5832
+ * gcc.target/powerpc/direct-move-long2.c: Likewise.
5834
+ * gcc.target/powerpc/bool3-av.c: Limit to 64-bit mode for now.
5835
+ * gcc.target/powerpc/bool3-p7.c: Likewise.
5836
+ * gcc.target/powerpc/bool3-p8.c: Likewise.
5838
+ * gcc.target/powerpc/p8vector-ldst.c: Just check that the
5839
+ appropriate instructions are generated, don't check the count.
5841
+ 2013-11-12 Michael Meissner <meissner@linux.vnet.ibm.com>
5844
+ * gcc.target/powerpc/pr59054.c: New test.
5846
+2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5848
+ Backport from mainline r205146
5849
+ 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5851
+ * gcc.target/powerpc/pr48258-1.c: Skip for little endian.
5853
+2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5855
+ Backport from mainline r205106:
5857
+ 2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5859
+ * gcc.target/powerpc/darwin-longlong.c (msw): Make endian-safe.
5861
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5863
+ Backport from mainline r205046:
5865
+ 2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5867
+ * gcc.target/powerpc/ppc64-abi-2.c (MAKE_SLOT): New macro to
5868
+ construct parameter slot value in endian-independent way.
5869
+ (fcevv, fciievv, fcvevv): Use it.
5871
+2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5873
+ Backport from mainline r204862
5874
+ 2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5876
+ * gcc.dg/vmx/3b-15.c: Revise for little endian.
5878
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5880
+ Backport from mainline r204808:
5882
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5884
+ * gcc.target/powerpc/ppc64-abi-1.c (stack_frame_t): Remove
5885
+ compiler and linker field if _CALL_ELF == 2.
5886
+ * gcc.target/powerpc/ppc64-abi-2.c (stack_frame_t): Likewise.
5887
+ * gcc.target/powerpc/ppc64-abi-dfp-1.c (stack_frame_t): Likewise.
5888
+ * gcc.dg/stack-usage-1.c (SIZE): Update value for _CALL_ELF == 2.
5890
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5892
+ * gcc.target/powerpc/ppc64-abi-dfp-1.c (FUNC_START): New macro.
5893
+ (WRAPPER): Use it.
5894
+ * gcc.target/powerpc/no-r11-1.c: Skip on powerpc_elfv2.
5895
+ * gcc.target/powerpc/no-r11-2.c: Skip on powerpc_elfv2.
5896
+ * gcc.target/powerpc/no-r11-3.c: Skip on powerpc_elfv2.
5898
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5900
+ * lib/target-supports.exp (check_effective_target_powerpc_elfv2):
5902
+ * gcc.target/powerpc/pr57949-1.c: Disable for powerpc_elfv2.
5903
+ * gcc.target/powerpc/pr57949-2.c: Likewise.
5905
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5907
+ Backport from mainline r204799:
5909
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5911
+ * g++.dg/eh/ppc64-sighandle-cr.C: New test.
5913
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5915
+ Backport from mainline r201750.
5916
+ Note: Default setting of -mcompat-align-parm inverted!
5918
+ 2013-08-14 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5921
+ * gcc.target/powerpc/pr57949-1.c: New.
5922
+ * gcc.target/powerpc/pr57949-2.c: New.
5924
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5926
+ Backport from mainline r201040 and r201929:
5928
+ 2013-08-22 Michael Meissner <meissner@linux.vnet.ibm.com>
5930
+ * gcc.target/powerpc/pr57744.c: Declare abort.
5932
+ 2013-07-18 Pat Haugen <pthaugen@us.ibm.com>
5934
+ * gcc.target/powerpc/pr57744.c: Fix typo.
5936
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5938
+ Backport from mainline r204321
5939
+ 2013-11-02 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
5941
+ * gcc.dg/vmx/vec-set.c: New.
5943
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5945
+ Backport from mainline r204138
5946
+ 2013-10-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5948
+ * gcc.dg/vmx/gcc-bug-i.c: Add little endian variant.
5949
+ * gcc.dg/vmx/eg-5.c: Likewise.
5951
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5953
+ Backport from mainline r203930
5954
+ 2013-10-22 Bill Schmidt <wschmidt@vnet.ibm.com>
5956
+ * gcc.target/powerpc/altivec-perm-1.c: Move the two vector pack
5958
+ * gcc.target/powerpc/altivec-perm-3.c: ...this new test, which is
5959
+ restricted to big-endian targets.
5961
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5963
+ Backport from mainline r203246
5964
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5966
+ * gcc.target/powerpc/pr43154.c: Skip for ppc64 little endian.
5967
+ * gcc.target/powerpc/fusion.c: Likewise.
5969
+2013-10-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5971
+ Backport from mainline
5972
+ 2013-04-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
5975
+ * gcc.target/powerpc/recip-1.c: Modify expected output.
5976
+ * gcc.target/powerpc/recip-3.c: Likewise.
5977
+ * gcc.target/powerpc/recip-4.c: Likewise.
5978
+ * gcc.target/powerpc/recip-5.c: Add expected output for iterations.
5980
+2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
5982
+ Back port from mainline
5983
+ 2013-10-03 Michael Meissner <meissner@linux.vnet.ibm.com>
5985
+ * gcc.target/powerpc/p8vector-fp.c: New test for floating point
5986
+ scalar operations when using -mupper-regs-sf and -mupper-regs-df.
5987
+ * gcc.target/powerpc/ppc-target-1.c: Update tests to allow either
5988
+ VSX scalar operations or the traditional floating point form of
5990
+ * gcc.target/powerpc/ppc-target-2.c: Likewise.
5991
+ * gcc.target/powerpc/recip-3.c: Likewise.
5992
+ * gcc.target/powerpc/recip-5.c: Likewise.
5993
+ * gcc.target/powerpc/pr72747.c: Likewise.
5994
+ * gcc.target/powerpc/vsx-builtin-3.c: Likewise.
5996
+ Back port from mainline
5997
+ 2013-09-27 Michael Meissner <meissner@linux.vnet.ibm.com>
5999
+ * gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
6000
+ and -mupper-regs-df.
6002
+ Back port from mainline
6003
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
6006
+ * gcc.target/powerpc/pr58673-1.c: New file to test whether
6007
+ -mquad-word + -mno-vsx-timode causes errors.
6008
+ * gcc.target/powerpc/pr58673-2.c: Likewise.
6010
+2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
6012
+ Back port from mainline
6013
+ 2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
6015
+ * gcc.target/powerpc/dfp-dd-2.c: New test.
6016
+ * gcc.target/powerpc/dfp-td-2.c: Likewise.
6017
+ * gcc.target/powerpc/dfp-td-3.c: Likewise.
6019
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6021
+ Backport from trunk.
6022
+ 2013-07-23 Michael Meissner <meissner@linux.vnet.ibm.com>
6024
+ * gcc.target/powerpc/bool2.h: New file, test the code generation
6025
+ of logical operations for power5, altivec, power7, and power8 systems.
6026
+ * gcc.target/powerpc/bool2-p5.c: Likewise.
6027
+ * gcc.target/powerpc/bool2-av.c: Likewise.
6028
+ * gcc.target/powerpc/bool2-p7.c: Likewise.
6029
+ * gcc.target/powerpc/bool2-p8.c: Likewise.
6030
+ * gcc.target/powerpc/bool3.h: Likewise.
6031
+ * gcc.target/powerpc/bool3-av.c: Likewise.
6032
+ * gcc.target/powerpc/bool2-p7.c: Likewise.
6033
+ * gcc.target/powerpc/bool2-p8.c: Likewise.
6035
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6037
+ Backport from trunk.
6038
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6040
+ * gcc.target/powerpc/fusion.c: New file, test power8 fusion support.
6042
+2013-08-05 Michael Meissner <meissner@linux.vnet.ibm.com>
6044
+ Back port from mainline:
6045
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6046
+ Pat Haugen <pthaugen@us.ibm.com>
6047
+ Peter Bergner <bergner@vnet.ibm.com>
6049
+ * lib/target-supports.exp (check_p8vector_hw_available) Add power8
6051
+ (check_effective_target_powerpc_p8vector_ok): Likewise.
6052
+ (is-effective-target): Likewise.
6053
+ (check_vect_support_and_set_flags): Likewise.
6055
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
6057
+ Back port from mainline
6058
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
6059
+ Peter Bergner <bergner@vnet.ibm.com>
6062
+ * g++.dg/overload/using3.C: New.
6063
+ * g++.dg/overload/using2.C: Adjust.
6064
+ * g++.dg/lookup/using9.C: Likewise.
6066
+2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6068
+ Back port from mainline
6069
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
6071
+ * gcc.target/powerpc/fusion.c: New file, test power8 fusion
6074
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
6076
+ Back port from mainline
6077
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
6079
+ * lib/target-supports.exp (check_effective_target_powerpc_htm_ok): New
6080
+ function to test if HTM is available.
6081
+ * gcc.target/powerpc/htm-xl-intrin-1.c: New test.
6082
+ * gcc.target/powerpc/htm-builtin-1.c: New test.
6084
+2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
6086
+ Back port from the trunk
6087
+ 2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
6090
+ * gcc.target/powerpc/pr57744.c: New test to make sure lqarx and
6091
+ stqcx. get even registers.
6093
+2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6095
+ Back port from the trunk
6097
+ 2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6098
+ Pat Haugen <pthaugen@us.ibm.com>
6099
+ Peter Bergner <bergner@vnet.ibm.com>
6101
+ * gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
6102
+ load/store instructions on power7, power8.
6103
+ * gcc.target/powerpc/atomic-p8.c: Likewise.
6105
+2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6107
+ Back port from the trunk
6109
+ 2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
6110
+ Pat Haugen <pthaugen@us.ibm.com>
6111
+ Peter Bergner <bergner@vnet.ibm.com>
6113
+ * gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
6114
+ load/store instructions on power7, power8.
6115
+ * gcc.target/powerpc/atomic-p8.c: Likewise.
6117
+ Back port from the trunk
6119
+ 2013-06-10 Michael Meissner <meissner@linux.vnet.ibm.com>
6120
+ Pat Haugen <pthaugen@us.ibm.com>
6121
+ Peter Bergner <bergner@vnet.ibm.com>
6123
+ * gcc.target/powerpc/direct-move-vint1.c: New tests for power8
6124
+ direct move instructions.
6125
+ * gcc.target/powerpc/direct-move-vint2.c: Likewise.
6126
+ * gcc.target/powerpc/direct-move.h: Likewise.
6127
+ * gcc.target/powerpc/direct-move-float1.c: Likewise.
6128
+ * gcc.target/powerpc/direct-move-float2.c: Likewise.
6129
+ * gcc.target/powerpc/direct-move-double1.c: Likewise.
6130
+ * gcc.target/powerpc/direct-move-double2.c: Likewise.
6131
+ * gcc.target/powerpc/direct-move-long1.c: Likewise.
6132
+ * gcc.target/powerpc/direct-move-long2.c: Likewise.
6134
+2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6136
+ Backport from the trunk
6138
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6139
+ Pat Haugen <pthaugen@us.ibm.com>
6140
+ Peter Bergner <bergner@vnet.ibm.com>
6142
+ * gcc.target/powerpc/p8vector-builtin-1.c: New test to test
6143
+ power8 builtin functions.
6144
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c: Likewise.
6145
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c: Likewise.
6146
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c: Likewise.
6147
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c: Likewise.
6148
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c: Likewise.
6149
+ * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c: Likewise.
6150
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c: New
6151
+ tests to test power8 auto-vectorization.
6152
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c: Likewise.
6153
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c: Likewise.
6154
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c: Likewise.
6155
+ * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c: Likewise.
6157
+ * gcc.target/powerpc/crypto-builtin-1.c: Use effective target
6158
+ powerpc_p8vector_ok instead of powerpc_vsx_ok.
6160
+ * gcc.target/powerpc/bool.c: New file, add eqv, nand, nor tests.
6162
+ * lib/target-supports.exp (check_p8vector_hw_available) Add power8
6164
+ (check_effective_target_powerpc_p8vector_ok): Likewise.
6165
+ (is-effective-target): Likewise.
6166
+ (check_vect_support_and_set_flags): Likewise.
6168
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
6170
+ Backport from trunk
6172
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6173
+ Pat Haugen <pthaugen@us.ibm.com>
6174
+ Peter Bergner <bergner@vnet.ibm.com>
6176
+ * gcc.target/powerpc/crypto-builtin-1.c: New file, test for power8
6179
+2013-05-06 Michael Meissner <meissner@linux.vnet.ibm.com>
6181
+ Backport from trunk
6182
+ 2013-05-03 Michael Meissner <meissner@linux.vnet.ibm.com>
6185
+ * gcc.target/powerpc/pr57150.c: New file.
6187
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6189
+ Backport from mainline
6190
+ 2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6192
+ * gcc.target/powerpc/mmfpgpr.c: New test.
6193
+ * gcc.target/powerpc/sd-vsx.c: Likewise.
6194
+ * gcc.target/powerpc/sd-pwr6.c: Likewise.
6195
+ * gcc.target/powerpc/vsx-float0.c: Likewise.
6197
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
6199
+ Clone branch from gcc-4_8-branch, subversion id 196835.
6200
--- a/src/gcc/testsuite/lib/target-supports.exp
6201
+++ b/src/gcc/testsuite/lib/target-supports.exp
6202
@@ -1311,6 +1311,32 @@
6206
+# Return 1 if the target supports executing power8 vector instructions, 0
6207
+# otherwise. Cache the result.
6209
+proc check_p8vector_hw_available { } {
6210
+ return [check_cached_effective_target p8vector_hw_available {
6211
+ # Some simulators are known to not support VSX/power8 instructions.
6212
+ # For now, disable on Darwin
6213
+ if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} {
6216
+ set options "-mpower8-vector"
6217
+ check_runtime_nocache p8vector_hw_available {
6221
+ asm volatile ("xxlorc vs0,vs0,vs0");
6223
+ asm volatile ("xxlorc 0,0,0");
6232
# Return 1 if the target supports executing VSX instructions, 0
6233
# otherwise. Cache the result.
6235
@@ -2672,6 +2698,33 @@
6239
+# Return 1 if this is a PowerPC target supporting -mpower8-vector
6241
+proc check_effective_target_powerpc_p8vector_ok { } {
6242
+ if { ([istarget powerpc*-*-*]
6243
+ && ![istarget powerpc-*-linux*paired*])
6244
+ || [istarget rs6000-*-*] } {
6245
+ # AltiVec is not supported on AIX before 5.3.
6246
+ if { [istarget powerpc*-*-aix4*]
6247
+ || [istarget powerpc*-*-aix5.1*]
6248
+ || [istarget powerpc*-*-aix5.2*] } {
6251
+ return [check_no_compiler_messages powerpc_p8vector_ok object {
6254
+ asm volatile ("xxlorc vs0,vs0,vs0");
6256
+ asm volatile ("xxlorc 0,0,0");
6260
+ } "-mpower8-vector"]
6266
# Return 1 if this is a PowerPC target supporting -mvsx
6268
proc check_effective_target_powerpc_vsx_ok { } {
6269
@@ -2699,6 +2752,27 @@
6273
+# Return 1 if this is a PowerPC target supporting -mhtm
6275
+proc check_effective_target_powerpc_htm_ok { } {
6276
+ if { ([istarget powerpc*-*-*]
6277
+ && ![istarget powerpc-*-linux*paired*])
6278
+ || [istarget rs6000-*-*] } {
6279
+ # HTM is not supported on AIX yet.
6280
+ if { [istarget powerpc*-*-aix*] } {
6283
+ return [check_no_compiler_messages powerpc_htm_ok object {
6285
+ asm volatile ("tbegin. 0");
6294
# Return 1 if this is a PowerPC target supporting -mcpu=cell.
6296
proc check_effective_target_powerpc_ppu_ok { } {
6297
@@ -2794,6 +2868,22 @@
6301
+# Return 1 if this is a PowerPC target using the ELFv2 ABI.
6303
+proc check_effective_target_powerpc_elfv2 { } {
6304
+ if { [istarget powerpc*-*-*] } {
6305
+ return [check_no_compiler_messages powerpc_elfv2 object {
6306
+ #if _CALL_ELF != 2
6307
+ #error not ELF v2 ABI
6317
# Return 1 if this is a SPU target with a toolchain that
6318
# supports automatic overlay generation.
6320
@@ -4499,6 +4589,7 @@
6322
"vmx_hw" { set selected [check_vmx_hw_available] }
6323
"vsx_hw" { set selected [check_vsx_hw_available] }
6324
+ "p8vector_hw" { set selected [check_p8vector_hw_available] }
6325
"ppc_recip_hw" { set selected [check_ppc_recip_hw_available] }
6326
"named_sections" { set selected [check_named_sections_available] }
6327
"gc_sections" { set selected [check_gc_sections_available] }
6328
@@ -4520,6 +4611,7 @@
6330
"vmx_hw" { return 1 }
6331
"vsx_hw" { return 1 }
6332
+ "p8vector_hw" { return 1 }
6333
"ppc_recip_hw" { return 1 }
6334
"named_sections" { return 1 }
6335
"gc_sections" { return 1 }
6336
@@ -5077,7 +5169,9 @@
6339
lappend DEFAULT_VECTCFLAGS "-maltivec"
6340
- if [check_vsx_hw_available] {
6341
+ if [check_p8vector_hw_available] {
6342
+ lappend DEFAULT_VECTCFLAGS "-mpower8-vector" "-mno-allow-movmisalign"
6343
+ } elseif [check_vsx_hw_available] {
6344
lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
6347
--- a/src/gcc/testsuite/gfortran.dg/nan_7.f90
6348
+++ b/src/gcc/testsuite/gfortran.dg/nan_7.f90
6350
! { dg-options "-fno-range-check" }
6351
! { dg-require-effective-target fortran_real_16 }
6352
! { dg-require-effective-target fortran_integer_16 }
6353
+! { dg-skip-if "" { "powerpc*le-*-*" } { "*" } { "" } }
6354
! PR47293 NAN not correctly read
6355
character(len=200) :: str
6357
--- a/src/gcc/testsuite/gcc.dg/vmx/3b-15.c
6358
+++ b/src/gcc/testsuite/gcc.dg/vmx/3b-15.c
6360
vector unsigned char
6361
f (vector unsigned char a, vector unsigned char b, vector unsigned char c)
6363
+#ifdef __BIG_ENDIAN__
6364
return vec_perm(a,b,c);
6366
+ return vec_perm(b,a,c);
6372
8,9,10,11,12,13,14,15}),
6373
((vector unsigned char){70,71,72,73,74,75,76,77,
6374
78,79,80,81,82,83,84,85}),
6375
+#ifdef __BIG_ENDIAN__
6376
((vector unsigned char){0x1,0x14,0x18,0x10,0x16,0x15,0x19,0x1a,
6377
0x1c,0x1c,0x1c,0x12,0x8,0x1d,0x1b,0xe})),
6379
+ ((vector unsigned char){0x1e,0xb,0x7,0xf,0x9,0xa,0x6,0x5,
6380
+ 0x3,0x3,0x3,0xd,0x17,0x2,0x4,0x11})),
6382
((vector unsigned char){1,74,78,70,76,75,79,80,82,82,82,72,8,83,81,14})),
6385
--- a/src/gcc/testsuite/gcc.dg/vmx/vec-set.c
6386
+++ b/src/gcc/testsuite/gcc.dg/vmx/vec-set.c
6388
+#include "harness.h"
6393
+ return (vector short){m, 0, 0, 0, 0, 0, 0, 0};
6398
+ check (vec_all_eq (vec_set (7),
6399
+ ((vector short){7, 0, 0, 0, 0, 0, 0, 0})),
6402
--- a/src/gcc/testsuite/gcc.dg/vmx/gcc-bug-i.c
6403
+++ b/src/gcc/testsuite/gcc.dg/vmx/gcc-bug-i.c
6405
#define DO_INLINE __attribute__ ((always_inline))
6406
#define DONT_INLINE __attribute__ ((noinline))
6408
+#ifdef __LITTLE_ENDIAN__
6409
+static inline DO_INLINE int inline_me(vector signed short data)
6411
+ union {vector signed short v; signed short s[8];} u;
6413
+ unsigned char x1, x2;
6417
+ x1 = (x >> 8) & 0xff;
6419
+ return ((x2 << 8) | x1);
6422
static inline DO_INLINE int inline_me(vector signed short data)
6424
union {vector signed short v; signed short s[8];} u;
6430
static DONT_INLINE int foo(vector signed short data)
6432
--- a/src/gcc/testsuite/gcc.dg/vmx/eg-5.c
6433
+++ b/src/gcc/testsuite/gcc.dg/vmx/eg-5.c
6435
/* Set result to a vector of f32 0's */
6436
vector float result = ((vector float){0.,0.,0.,0.});
6438
+#ifdef __LITTLE_ENDIAN__
6439
+ result = vec_madd (c0, vec_splat (v, 3), result);
6440
+ result = vec_madd (c1, vec_splat (v, 2), result);
6441
+ result = vec_madd (c2, vec_splat (v, 1), result);
6442
+ result = vec_madd (c3, vec_splat (v, 0), result);
6444
result = vec_madd (c0, vec_splat (v, 0), result);
6445
result = vec_madd (c1, vec_splat (v, 1), result);
6446
result = vec_madd (c2, vec_splat (v, 2), result);
6447
result = vec_madd (c3, vec_splat (v, 3), result);
6452
--- a/src/gcc/testsuite/gcc.dg/stack-usage-1.c
6453
+++ b/src/gcc/testsuite/gcc.dg/stack-usage-1.c
6456
#elif defined (__powerpc64__) || defined (__ppc64__) || defined (__POWERPC64__) \
6457
|| defined (__PPC64__)
6459
+# if _CALL_ELF == 2
6464
#elif defined (__powerpc__) || defined (__PPC__) || defined (__ppc__) \
6465
|| defined (__POWERPC__) || defined (PPC) || defined (_IBMR2)
6466
# if defined (__ALTIVEC__)
6467
--- a/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c
6468
+++ b/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-34.c
6470
/* { dg-require-effective-target vect_int } */
6471
+/* { dg-skip-if "cost too high" { powerpc*le-*-* } { "*" } { "" } } */
6474
#include "../../tree-vect.h"
6475
--- a/src/gcc/testsuite/g++.dg/lookup/using9.C
6476
+++ b/src/gcc/testsuite/g++.dg/lookup/using9.C
6479
f(1); // { dg-error "ambiguous" }
6480
// { dg-message "candidate" "candidate note" { target *-*-* } 22 }
6481
- void f(int); // { dg-error "previous using declaration" }
6482
+ void f(int); // { dg-error "previous declaration" }
6488
- using B::f; // { dg-error "already declared" }
6489
+ using B::f; // { dg-error "previous declaration" }
6491
--- a/src/gcc/testsuite/g++.dg/eh/ppc64-sighandle-cr.C
6492
+++ b/src/gcc/testsuite/g++.dg/eh/ppc64-sighandle-cr.C
6494
+// { dg-do run { target { powerpc64*-*-linux* } } }
6495
+// { dg-options "-fexceptions -fnon-call-exceptions" }
6497
+#include <signal.h>
6498
+#include <stdlib.h>
6501
+#define SET_CR(R,V) __asm__ __volatile__ ("mtcrf %0,%1" : : "n" (1<<(7-R)), "r" (V<<(4*(7-R))) : "cr" #R)
6502
+#define GET_CR(R) ({ int tmp; __asm__ __volatile__ ("mfcr %0" : "=r" (tmp)); (tmp >> 4*(7-R)) & 15; })
6504
+void sighandler (int signo, siginfo_t * si, void * uc)
6513
+float test (float a, float b) __attribute__ ((__noinline__));
6514
+float test (float a, float b)
6517
+ asm ("mtcrf %1,%2" : "=f" (x) : "n" (1 << (7-3)), "r" (0), "0" (b) : "cr3");
6523
+ struct sigaction sa;
6526
+ sa.sa_sigaction = sighandler;
6527
+ sa.sa_flags = SA_SIGINFO;
6529
+ status = sigaction (SIGFPE, & sa, NULL);
6531
+ feenableexcept (FE_DIVBYZERO);
6541
+ return GET_CR(2) != 6 || GET_CR(3) != 9 || GET_CR(4) != 12;
6548
--- a/src/gcc/testsuite/g++.dg/overload/using3.C
6549
+++ b/src/gcc/testsuite/g++.dg/overload/using3.C
6551
+// { dg-do compile }
6560
+ void f(int); // { dg-message "previous" }
6565
+ using a::f; // { dg-error "conflicts" }
6567
--- a/src/gcc/testsuite/g++.dg/overload/using2.C
6568
+++ b/src/gcc/testsuite/g++.dg/overload/using2.C
6570
extern "C" void exit (int) throw ();
6571
extern "C" void *malloc (__SIZE_TYPE__) throw () __attribute__((malloc));
6573
- void abort (void) throw ();
6574
+ void abort (void) throw (); // { dg-message "previous" }
6575
void _exit (int) throw (); // { dg-error "conflicts" "conflicts" }
6576
// { dg-message "void _exit" "_exit" { target *-*-* } 49 }
6579
// { dg-message "void C1" "C1" { target *-*-* } 53 }
6581
extern "C" void c2 (void) throw ();
6582
- void C2 (void) throw ();
6583
+ void C2 (void) throw (); // { dg-message "previous" }
6585
int C3 (int) throw ();
6588
-using std::abort; // { dg-error "already declared" }
6589
+using std::abort; // { dg-error "conflicts" }
6591
-using std::C2; // { dg-error "already declared" }
6592
+using std::C2; // { dg-error "conflicts" }
6594
using std::c3; using other::c3;
6595
using std::C3; using other::C3;
6596
--- a/src/gcc/cp/ChangeLog.ibm
6597
+++ b/src/gcc/cp/ChangeLog.ibm
6599
+2013-08-04 Peter Bergner <bergner@vnet.ibm.com>
6601
+ Back port from mainline
6602
+ 2013-08-01 Fabien Chêne <fabien@gcc.gnu.org>
6605
+ * cp-tree.h: Check OVL_USED with OVERLOAD_CHECK.
6606
+ * name-lookup.c (do_nonmember_using_decl): Make sure we have an
6607
+ OVERLOAD before calling OVL_USED. Call diagnose_name_conflict
6608
+ instead of issuing an error without mentioning the conflicting
6610
--- a/src/gcc/cp/cp-tree.h
6611
+++ b/src/gcc/cp/cp-tree.h
6613
/* If set, this was imported in a using declaration.
6614
This is not to confuse with being used somewhere, which
6615
is not important for this node. */
6616
-#define OVL_USED(NODE) TREE_USED (NODE)
6617
+#define OVL_USED(NODE) TREE_USED (OVERLOAD_CHECK (NODE))
6618
/* If set, this OVERLOAD was created for argument-dependent lookup
6619
and can be freed afterward. */
6620
#define OVL_ARG_DEPENDENT(NODE) TREE_LANG_FLAG_0 (OVERLOAD_CHECK (NODE))
6621
--- a/src/gcc/cp/name-lookup.c
6622
+++ b/src/gcc/cp/name-lookup.c
6623
@@ -2268,8 +2268,7 @@
6624
&& compparms (TYPE_ARG_TYPES (TREE_TYPE (fn)),
6625
TYPE_ARG_TYPES (TREE_TYPE (decl)))
6626
&& ! decls_match (fn, decl))
6627
- error ("%q#D conflicts with previous using declaration %q#D",
6629
+ diagnose_name_conflict (decl, fn);
6631
dup = duplicate_decls (decl, fn, is_friend);
6632
/* If DECL was a redeclaration of FN -- even an invalid
6633
@@ -2501,7 +2500,7 @@
6634
if (new_fn == old_fn)
6635
/* The function already exists in the current namespace. */
6637
- else if (OVL_USED (tmp1))
6638
+ else if (TREE_CODE (tmp1) == OVERLOAD && OVL_USED (tmp1))
6639
continue; /* this is a using decl */
6640
else if (compparms (TYPE_ARG_TYPES (TREE_TYPE (new_fn)),
6641
TYPE_ARG_TYPES (TREE_TYPE (old_fn))))
6642
@@ -2516,7 +2515,7 @@
6646
- error ("%qD is already declared in this scope", name);
6647
+ diagnose_name_conflict (new_fn, old_fn);
6651
--- a/src/gcc/builtins.def
6652
+++ b/src/gcc/builtins.def
6654
DEF_LIB_BUILTIN (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
6655
DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
6656
DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
6657
+DEF_GCC_BUILTIN (BUILT_IN_FABSD32, "fabsd32", BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST)
6658
+DEF_GCC_BUILTIN (BUILT_IN_FABSD64, "fabsd64", BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST)
6659
+DEF_GCC_BUILTIN (BUILT_IN_FABSD128, "fabsd128", BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST)
6660
DEF_C99_BUILTIN (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
6661
DEF_C99_BUILTIN (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
6662
DEF_C99_BUILTIN (BUILT_IN_FDIML, "fdiml", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
6663
--- a/src/gcc/expr.h
6664
+++ b/src/gcc/expr.h
6669
-extern void locate_and_pad_parm (enum machine_mode, tree, int, int, tree,
6670
- struct args_size *,
6671
+extern void locate_and_pad_parm (enum machine_mode, tree, int, int, int,
6672
+ tree, struct args_size *,
6673
struct locate_and_pad_arg_data *);
6675
/* Return the CODE_LABEL rtx for a LABEL_DECL, creating it if necessary. */
6676
--- a/src/gcc/function.c
6677
+++ b/src/gcc/function.c
6678
@@ -2507,6 +2507,7 @@
6681
locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,
6682
+ all->reg_parm_stack_space,
6683
entry_parm ? data->partial : 0, current_function_decl,
6684
&all->stack_args_size, &data->locate);
6686
@@ -3485,11 +3486,7 @@
6687
/* Adjust function incoming argument size for alignment and
6690
-#ifdef REG_PARM_STACK_SPACE
6691
- crtl->args.size = MAX (crtl->args.size,
6692
- REG_PARM_STACK_SPACE (fndecl));
6695
+ crtl->args.size = MAX (crtl->args.size, all.reg_parm_stack_space);
6696
crtl->args.size = CEIL_ROUND (crtl->args.size,
6697
PARM_BOUNDARY / BITS_PER_UNIT);
6699
@@ -3693,6 +3690,9 @@
6700
IN_REGS is nonzero if the argument will be passed in registers. It will
6701
never be set if REG_PARM_STACK_SPACE is not defined.
6703
+ REG_PARM_STACK_SPACE is the number of bytes of stack space reserved
6704
+ for arguments which are passed in registers.
6706
FNDECL is the function in which the argument was defined.
6708
There are two types of rounding that are done. The first, controlled by
6709
@@ -3713,19 +3713,16 @@
6712
locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
6713
- int partial, tree fndecl ATTRIBUTE_UNUSED,
6714
+ int reg_parm_stack_space, int partial,
6715
+ tree fndecl ATTRIBUTE_UNUSED,
6716
struct args_size *initial_offset_ptr,
6717
struct locate_and_pad_arg_data *locate)
6720
enum direction where_pad;
6721
unsigned int boundary, round_boundary;
6722
- int reg_parm_stack_space = 0;
6723
int part_size_in_regs;
6725
-#ifdef REG_PARM_STACK_SPACE
6726
- reg_parm_stack_space = REG_PARM_STACK_SPACE (fndecl);
6728
/* If we have found a stack parm before we reach the end of the
6729
area reserved for registers, skip that area. */
6731
@@ -3743,7 +3740,6 @@
6732
initial_offset_ptr->constant = reg_parm_stack_space;
6735
-#endif /* REG_PARM_STACK_SPACE */
6737
part_size_in_regs = (reg_parm_stack_space == 0 ? partial : 0);
6739
@@ -3806,11 +3802,7 @@
6741
locate->slot_offset.constant += part_size_in_regs;
6744
-#ifdef REG_PARM_STACK_SPACE
6745
- || REG_PARM_STACK_SPACE (fndecl) > 0
6748
+ if (!in_regs || reg_parm_stack_space > 0)
6749
pad_to_arg_alignment (&locate->slot_offset, boundary,
6750
&locate->alignment_pad);
6752
@@ -3830,11 +3822,7 @@
6753
pad_below (&locate->offset, passed_mode, sizetree);
6755
#else /* !ARGS_GROW_DOWNWARD */
6757
-#ifdef REG_PARM_STACK_SPACE
6758
- || REG_PARM_STACK_SPACE (fndecl) > 0
6761
+ if (!in_regs || reg_parm_stack_space > 0)
6762
pad_to_arg_alignment (initial_offset_ptr, boundary,
6763
&locate->alignment_pad);
6764
locate->slot_offset = *initial_offset_ptr;
6765
@@ -5093,6 +5081,7 @@
6766
amount. BLKmode results are handled using the group load/store
6768
if (TYPE_MODE (TREE_TYPE (decl_result)) != BLKmode
6769
+ && REG_P (real_decl_rtl)
6770
&& targetm.calls.return_in_msb (TREE_TYPE (decl_result)))
6772
emit_move_insn (gen_rtx_REG (GET_MODE (decl_rtl),
6773
--- a/src/gcc/ChangeLog.ibm
6774
+++ b/src/gcc/ChangeLog.ibm
6776
+2014-01-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6778
+ Back port from mainline
6779
+ 2014-01-16 Michael Meissner <meissner@linux.vnet.ibm.com>
6782
+ * config/rs6000/rs6000.md (reload_vsx_from_gprsf): Add little
6783
+ endian support, remove tests for WORDS_BIG_ENDIAN.
6784
+ (p8_mfvsrd_3_<mode>): Likewise.
6785
+ (reload_gpr_from_vsx<mode>): Likewise.
6786
+ (reload_gpr_from_vsxsf): Likewise.
6787
+ (p8_mfvsrd_4_disf): Likewise.
6789
+2014-01-16 Peter Bergner <bergner@vnet.ibm.com>
6791
+ Merge up to 206665.
6792
+ * REVISION: Update subversion id.
6794
+2014-01-13 Peter Bergner <bergner@vnet.ibm.com>
6796
+ Merge up to 206579.
6797
+ * REVISION: Update subversion id.
6799
+2014-01-08 Peter Bergner <bergner@vnet.ibm.com>
6801
+ Merge up to 206404.
6802
+ * REVISION: Update subversion id.
6804
+2013-12-10 Peter Bergner <bergner@vnet.ibm.com>
6806
+ Merge up to 205847.
6807
+ * REVISION: Update subversion id.
6809
+2013-12-03 Peter Bergner <bergner@vnet.ibm.com>
6811
+ Backport from mainline
6812
+ 2013-12-03 Peter Bergner <bergner@vnet.ibm.com>
6814
+ * config/rs6000/htmintrin.h (_TEXASR_INSTRUCTION_FETCH_CONFLICT): Fix
6815
+ typo in macro name.
6816
+ (_TEXASRU_INSTRUCTION_FETCH_CONFLICT): Likewise.
6818
+2013-11-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6820
+ Backport from mainline r205333
6821
+ 2013-11-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6823
+ * config/rs6000/rs6000.c (rs6000_expand_vec_perm_const_1): Correct
6824
+ for little endian.
6826
+2013-11-23 Alan Modra <amodra@gmail.com>
6828
+ Apply mainline r205299.
6829
+ * config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX.
6831
+2013-11-22 Michael Meissner <meissner@linux.vnet.ibm.com>
6833
+ Backport from mainline
6834
+ 2013-11-12 Michael Meissner <meissner@linux.vnet.ibm.com>
6837
+ * config/rs6000/rs6000.md (movdi_internal32): Eliminate
6838
+ constraints that would allow DImode into the traditional Altivec
6839
+ registers, but cause undesirable code generation when loading 0 as
6841
+ (movdi_internal64): Likewise.
6842
+ (cmp<mode>_fpr): Do not use %x for CR register output.
6843
+ (extendsfdf2_fpr): Fix constraints when -mallow-upper-df and
6844
+ -mallow-upper-sf debug switches are used.
6846
+2013-11-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6848
+ Backport from mainline r205241
6849
+ 2013-11-21 Bill Schmidt <wschmidt@vnet.ibm.com>
6851
+ * config/rs6000/vector.md (vec_pack_trunc_v2df): Revert previous
6852
+ little endian change.
6853
+ (vec_pack_sfix_trunc_v2df): Likewise.
6854
+ (vec_pack_ufix_trunc_v2df): Likewise.
6855
+ * config/rs6000/rs6000.c (rs6000_expand_interleave): Correct
6856
+ double checking of endianness.
6858
+2013-11-21 Peter Bergner <bergner@vnet.ibm.com>
6860
+ Backport from mainline r205233.
6861
+ 2013-11-21 Peter Bergner <bergner@vnet.ibm.com>
6863
+ * doc/extend.texi: Document htm builtins.
6865
+2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6867
+ Backport from mainline r205146
6868
+ 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6870
+ * config/rs6000/vsx.md (vsx_set_<mode>): Adjust for little endian.
6871
+ (vsx_extract_<mode>): Likewise.
6872
+ (*vsx_extract_<mode>_one_le): New LE variant on
6873
+ *vsx_extract_<mode>_zero.
6874
+ (vsx_extract_v4sf): Adjust for little endian.
6876
+2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6878
+ Backport from mainline r205123:
6880
+ 2013-11-20 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6882
+ * config/rs6000/rs6000.c (rs6000_cannot_change_mode_class): Do not
6883
+ allow subregs of TDmode in FPRs of smaller size in little-endian.
6884
+ (rs6000_split_multireg_move): When splitting an access to TDmode
6885
+ in FPRs, do not use simplify_gen_subreg.
6887
+2013-11-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6889
+ Backport from mainline r205080
6890
+ 2013-11-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6892
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Adjust
6893
+ V16QI vector splat case for little endian.
6895
+2013-11-20 Alan Modra <amodra@gmail.com>
6897
+ Apply mainline r205060.
6898
+ * config/rs6000/sysv4.h (CC1_ENDIAN_LITTLE_SPEC): Define as empty.
6899
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Default
6900
+ to strict alignment on older processors when little-endian.
6901
+ * config/rs6000/linux64.h (PROCESSOR_DEFAULT64): Default to power8
6904
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6906
+ Backport from mainline r205045:
6908
+ 2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6910
+ * config/rs6000/vector.md ("mov<mode>"): Do not call
6911
+ rs6000_emit_le_vsx_move to move into or out of GPRs.
6912
+ * config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Assert
6913
+ source and destination are not GPR hard regs.
6915
+2013-11-18 Peter Bergner <bergner@vnet.ibm.com>
6917
+ Merge up to 204974.
6918
+ * REVISION: Update subversion id.
6920
+2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6922
+ Backport from mainline r204927:
6924
+ 2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6926
+ * config/rs6000/rs6000.c (rs6000_emit_move): Use low word of
6927
+ sdmode_stack_slot also in little-endian mode.
6929
+2013-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6931
+ Backport from mainline r204920
6932
+ 2011-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6934
+ * config/rs6000/rs6000.c (rs6000_frame_related): Add split_reg
6935
+ parameter and use it in REG_FRAME_RELATED_EXPR note.
6936
+ (emit_frame_save): Call rs6000_frame_related with extra NULL_RTX
6938
+ (rs6000_emit_prologue): Likewise, but for little endian VSX
6939
+ stores, pass the source register of the store instead.
6941
+2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6943
+ Backport from mainline r204862
6944
+ 2013-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
6946
+ * config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X):
6948
+ (altivec_vperm_<mode>): Revert earlier little endian change.
6949
+ (*altivec_vperm_<mode>_internal): Remove.
6950
+ (altivec_vperm_<mode>_uns): Revert earlier little endian change.
6951
+ (*altivec_vperm_<mode>_uns_internal): Remove.
6952
+ * config/rs6000/vector.md (vec_realign_load_<mode>): Revise
6955
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6957
+ Backport from mainline r204842:
6959
+ 2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6961
+ * doc/invoke.texi (-mabi=elfv1, -mabi=elfv2): Document.
6963
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6965
+ Backport from mainline r204809:
6967
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6969
+ * config/rs6000/sysv4le.h (LINUX64_DEFAULT_ABI_ELFv2): Define.
6971
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6973
+ Backport from mainline r204808:
6975
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6976
+ Alan Modra <amodra@gmail.com>
6978
+ * config/rs6000/rs6000.h (RS6000_SAVE_AREA): Handle ABI_ELFv2.
6979
+ (RS6000_SAVE_TOC): Remove.
6980
+ (RS6000_TOC_SAVE_SLOT): New macro.
6981
+ * config/rs6000/rs6000.c (rs6000_parm_offset): New function.
6982
+ (rs6000_parm_start): Use it.
6983
+ (rs6000_function_arg_advance_1): Likewise.
6984
+ (rs6000_emit_prologue): Use RS6000_TOC_SAVE_SLOT.
6985
+ (rs6000_emit_epilogue): Likewise.
6986
+ (rs6000_call_aix): Likewise.
6987
+ (rs6000_output_function_prologue): Do not save/restore r11
6988
+ around calling _mcount for ABI_ELFv2.
6990
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
6991
+ Alan Modra <amodra@gmail.com>
6993
+ * config/rs6000/rs6000-protos.h (rs6000_reg_parm_stack_space):
6995
+ * config/rs6000/rs6000.h (RS6000_REG_SAVE): Remove.
6996
+ (REG_PARM_STACK_SPACE): Call rs6000_reg_parm_stack_space.
6997
+ * config/rs6000/rs6000.c (rs6000_parm_needs_stack): New function.
6998
+ (rs6000_function_parms_need_stack): Likewise.
6999
+ (rs6000_reg_parm_stack_space): Likewise.
7000
+ (rs6000_function_arg): Do not replace BLKmode by Pmode when
7001
+ returning a register argument.
7003
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7004
+ Michael Gschwind <mkg@us.ibm.com>
7006
+ * config/rs6000/rs6000.h (FP_ARG_MAX_RETURN): New macro.
7007
+ (ALTIVEC_ARG_MAX_RETURN): Likewise.
7008
+ (FUNCTION_VALUE_REGNO_P): Use them.
7009
+ * config/rs6000/rs6000.c (TARGET_RETURN_IN_MSB): Define.
7010
+ (rs6000_return_in_msb): New function.
7011
+ (rs6000_return_in_memory): Handle ELFv2 homogeneous aggregates.
7012
+ Handle aggregates of up to 16 bytes for ELFv2.
7013
+ (rs6000_function_value): Handle ELFv2 homogeneous aggregates.
7015
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7016
+ Michael Gschwind <mkg@us.ibm.com>
7018
+ * config/rs6000/rs6000.h (AGGR_ARG_NUM_REG): Define.
7019
+ * config/rs6000/rs6000.c (rs6000_aggregate_candidate): New function.
7020
+ (rs6000_discover_homogeneous_aggregate): Likewise.
7021
+ (rs6000_function_arg_boundary): Handle homogeneous aggregates.
7022
+ (rs6000_function_arg_advance_1): Likewise.
7023
+ (rs6000_function_arg): Likewise.
7024
+ (rs6000_arg_partial_bytes): Likewise.
7025
+ (rs6000_psave_function_arg): Handle BLKmode arguments.
7027
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7028
+ Michael Gschwind <mkg@us.ibm.com>
7030
+ * config/rs6000/rs6000.h (AGGR_ARG_NUM_REG): Define.
7031
+ * config/rs6000/rs6000.c (rs6000_aggregate_candidate): New function.
7032
+ (rs6000_discover_homogeneous_aggregate): Likewise.
7033
+ (rs6000_function_arg_boundary): Handle homogeneous aggregates.
7034
+ (rs6000_function_arg_advance_1): Likewise.
7035
+ (rs6000_function_arg): Likewise.
7036
+ (rs6000_arg_partial_bytes): Likewise.
7037
+ (rs6000_psave_function_arg): Handle BLKmode arguments.
7039
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7041
+ * config/rs6000/rs6000.c (machine_function): New member
7043
+ (rs6000_emit_prologue): Set r2_setup_needed if necessary.
7044
+ (rs6000_output_mi_thunk): Set r2_setup_needed.
7045
+ (rs6000_output_function_prologue): Output global entry point
7046
+ prologue and local entry point marker if needed for ABI_ELFv2.
7047
+ Output -mprofile-kernel code here.
7048
+ (output_function_profiler): Do not output -mprofile-kernel
7049
+ code here; moved to rs6000_output_function_prologue.
7050
+ (rs6000_file_start): Output ".abiversion 2" for ABI_ELFv2.
7052
+ (rs6000_emit_move): Do not handle dot symbols for ABI_ELFv2.
7053
+ (rs6000_output_function_entry): Likewise.
7054
+ (rs6000_assemble_integer): Likewise.
7055
+ (rs6000_elf_encode_section_info): Likewise.
7056
+ (rs6000_elf_declare_function_name): Do not create dot symbols
7057
+ or .opd section for ABI_ELFv2.
7059
+ (rs6000_trampoline_size): Update for ABI_ELFv2 trampolines.
7060
+ (rs6000_trampoline_init): Likewise.
7061
+ (rs6000_elf_file_end): Call file_end_indicate_exec_stack
7064
+ (rs6000_call_aix): Handle ELFv2 indirect calls. Do not check
7065
+ for function descriptors in ABI_ELFv2.
7067
+ * config/rs6000/rs6000.md ("*call_indirect_aix<mode>"): Support
7068
+ on ABI_AIX only, not ABI_ELFv2.
7069
+ ("*call_value_indirect_aix<mode>"): Likewise.
7070
+ ("*call_indirect_elfv2<mode>"): New pattern.
7071
+ ("*call_value_indirect_elfv2<mode>"): Likewise.
7073
+ * config/rs6000/predicates.md ("symbol_ref_operand"): Do not
7074
+ check for function descriptors in ABI_ELFv2.
7075
+ ("current_file_function_operand"): Likewise.
7077
+ * config/rs6000/ppc-asm.h [__powerpc64__ && _CALL_ELF == 2]:
7079
+ (FUNC_NAME): Define ELFv2 variant.
7080
+ (JUMP_TARGET): Likewise.
7081
+ (FUNC_START): Likewise.
7082
+ (HIDDEN_FUNC): Likewise.
7083
+ (FUNC_END): Likeiwse.
7085
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7087
+ * config.gcc [powerpc*-*-* | rs6000-*-*]: Support --with-abi=elfv1
7088
+ and --with-abi=elfv2.
7089
+ * config/rs6000/option-defaults.h (OPTION_DEFAULT_SPECS): Add "abi".
7090
+ * config/rs6000/rs6000.opt (mabi=elfv1): New option.
7091
+ (mabi=elfv2): Likewise.
7092
+ * config/rs6000/rs6000-opts.h (enum rs6000_abi): Add ABI_ELFv2.
7093
+ * config/rs6000/linux64.h (DEFAULT_ABI): Do not hard-code to AIX_ABI
7094
+ if !RS6000_BI_ARCH.
7095
+ (ELFv2_ABI_CHECK): New macro.
7096
+ (SUBSUBTARGET_OVERRIDE_OPTIONS): Use it to decide whether to set
7097
+ rs6000_current_abi to ABI_AIX or ABI_ELFv2.
7098
+ (GLIBC_DYNAMIC_LINKER64): Support ELFv2 ld.so version.
7099
+ * config/rs6000/rs6000-c.c (rs6000_cpu_cpp_builtins): Predefine
7100
+ _CALL_ELF and __STRUCT_PARM_ALIGN__ if appropriate.
7102
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Handle ABI_ELFv2.
7103
+ (debug_stack_info): Likewise.
7104
+ (rs6000_file_start): Treat ABI_ELFv2 the same as ABI_AIX.
7105
+ (rs6000_legitimize_tls_address): Likewise.
7106
+ (rs6000_conditional_register_usage): Likewise.
7107
+ (rs6000_emit_move): Likewise.
7108
+ (init_cumulative_args): Likewise.
7109
+ (rs6000_function_arg_advance_1): Likewise.
7110
+ (rs6000_function_arg): Likewise.
7111
+ (rs6000_arg_partial_bytes): Likewise.
7112
+ (rs6000_output_function_entry): Likewise.
7113
+ (rs6000_assemble_integer): Likewise.
7114
+ (rs6000_savres_strategy): Likewise.
7115
+ (rs6000_stack_info): Likewise.
7116
+ (rs6000_function_ok_for_sibcall): Likewise.
7117
+ (rs6000_emit_load_toc_table): Likewise.
7118
+ (rs6000_savres_routine_name): Likewise.
7119
+ (ptr_regno_for_savres): Likewise.
7120
+ (rs6000_emit_prologue): Likewise.
7121
+ (rs6000_emit_epilogue): Likewise.
7122
+ (rs6000_output_function_epilogue): Likewise.
7123
+ (output_profile_hook): Likewise.
7124
+ (output_function_profiler): Likewise.
7125
+ (rs6000_trampoline_size): Likewise.
7126
+ (rs6000_trampoline_init): Likewise.
7127
+ (rs6000_elf_output_toc_section_asm_op): Likewise.
7128
+ (rs6000_elf_encode_section_info): Likewise.
7129
+ (rs6000_elf_reloc_rw_mask): Likewise.
7130
+ (rs6000_elf_declare_function_name): Likewise.
7131
+ (rs6000_function_arg_boundary): Treat ABI_ELFv2 the same as ABI_AIX,
7132
+ except that rs6000_compat_align_parm is always assumed false.
7133
+ (rs6000_gimplify_va_arg): Likewise.
7134
+ (rs6000_call_aix): Update comment.
7135
+ (rs6000_sibcall_aix): Likewise.
7136
+ * config/rs6000/rs6000.md ("tls_gd_aix<TLSmode:tls_abi_suffix>"):
7137
+ Treat ABI_ELFv2 the same as ABI_AIX.
7138
+ ("*tls_gd_call_aix<TLSmode:tls_abi_suffix>"): Likewise.
7139
+ ("tls_ld_aix<TLSmode:tls_abi_suffix>"): Likewise.
7140
+ ("*tls_ld_call_aix<TLSmode:tls_abi_suffix>"): Likewise.
7141
+ ("load_toc_aix_si"): Likewise.
7142
+ ("load_toc_aix_di"): Likewise.
7143
+ ("call"): Likewise.
7144
+ ("call_value"): Likewise.
7145
+ ("*call_local_aix<mode>"): Likewise.
7146
+ ("*call_value_local_aix<mode>"): Likewise.
7147
+ ("*call_nonlocal_aix<mode>"): Likewise.
7148
+ ("*call_value_nonlocal_aix<mode>"): Likewise.
7149
+ ("*call_indirect_aix<mode>"): Likewise.
7150
+ ("*call_value_indirect_aix<mode>"): Likewise.
7151
+ ("sibcall"): Likewise.
7152
+ ("sibcall_value"): Likewise.
7153
+ ("*sibcall_aix<mode>"): Likewise.
7154
+ ("*sibcall_value_aix<mode>"): Likewise.
7155
+ * config/rs6000/predicates.md ("symbol_ref_operand"): Likewise.
7156
+ ("current_file_function_operand"): Likewise.
7158
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7160
+ Backport from mainline r204807:
7162
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7164
+ * config/rs6000/rs6000.c (rs6000_arg_partial_bytes): Simplify logic
7165
+ by making use of the fact that for vector / floating point arguments
7166
+ passed both in VRs/FPRs and in the fixed parameter area, the partial
7167
+ bytes mechanism is in fact not used.
7169
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7171
+ Backport from mainline r204806:
7173
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7175
+ * config/rs6000/rs6000.c (rs6000_psave_function_arg): New function.
7176
+ (rs6000_finish_function_arg): Likewise.
7177
+ (rs6000_function_arg): Use rs6000_psave_function_arg and
7178
+ rs6000_finish_function_arg to handle both vector and floating
7179
+ point arguments that are also passed in GPRs / the stack.
7181
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7183
+ Backport from mainline r204805:
7185
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7187
+ * config/rs6000/rs6000.c (USE_FP_FOR_ARG_P): Remove TYPE argument.
7188
+ (USE_ALTIVEC_FOR_ARG_P): Likewise.
7189
+ (rs6000_darwin64_record_arg_advance_recurse): Update uses.
7190
+ (rs6000_function_arg_advance_1):Likewise.
7191
+ (rs6000_darwin64_record_arg_recurse): Likewise.
7192
+ (rs6000_function_arg): Likewise.
7193
+ (rs6000_arg_partial_bytes): Likewise.
7195
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7197
+ Backport from mainline r204804:
7199
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7201
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Replace
7202
+ "DEFAULT_ABI != ABI_AIX" test by testing for ABI_V4 or ABI_DARWIN.
7203
+ (rs6000_savres_strategy): Likewise.
7204
+ (rs6000_return_addr): Likewise.
7205
+ (rs6000_emit_load_toc_table): Replace "DEFAULT_ABI != ABI_AIX" by
7206
+ testing for ABI_V4 (since ABI_DARWIN is impossible here).
7207
+ (rs6000_emit_prologue): Likewise.
7208
+ (legitimate_lo_sum_address_p): Simplify DEFAULT_ABI test.
7209
+ (rs6000_elf_declare_function_name): Remove duplicated test.
7210
+ * config/rs6000/rs6000.md ("load_toc_v4_PIC_1"): Explicitly test
7211
+ for ABI_V4 (instead of "DEFAULT_ABI != ABI_AIX" test).
7212
+ ("load_toc_v4_PIC_1_normal"): Likewise.
7213
+ ("load_toc_v4_PIC_1_476"): Likewise.
7214
+ ("load_toc_v4_PIC_1b"): Likewise.
7215
+ ("load_toc_v4_PIC_1b_normal"): Likewise.
7216
+ ("load_toc_v4_PIC_1b_476"): Likewise.
7217
+ ("load_toc_v4_PIC_2"): Likewise.
7218
+ ("load_toc_v4_PIC_3b"): Likewise.
7219
+ ("load_toc_v4_PIC_3c"): Likewise.
7220
+ * config/rs6000/rs6000.h (RS6000_REG_SAVE): Simplify DEFAULT_ABI test.
7221
+ (RS6000_SAVE_AREA): Likewise.
7222
+ (FP_ARG_MAX_REG): Likewise.
7223
+ (RETURN_ADDRESS_OFFSET): Likewise.
7224
+ * config/rs6000/sysv.h (TARGET_TOC): Test for ABI_V4 instead
7226
+ (SUBTARGET_OVERRIDE_OPTIONS): Likewise.
7227
+ (MINIMAL_TOC_SECTION_ASM_OP): Likewise.
7229
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7231
+ Backport from mainline r204803:
7233
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7235
+ * config/rs6000/rs6000.c (rs6000_call_indirect_aix): Rename to ...
7236
+ (rs6000_call_aix): ... this. Handle both direct and indirect calls.
7237
+ Create call insn directly instead of via various gen_... routines.
7238
+ Mention special registers used by the call in CALL_INSN_FUNCTION_USAGE.
7239
+ (rs6000_sibcall_aix): New function.
7240
+ * config/rs6000/rs6000.md (TOC_SAVE_OFFSET_32BIT): Remove.
7241
+ (TOC_SAVE_OFFSET_64BIT): Likewise.
7242
+ (AIX_FUNC_DESC_TOC_32BIT): Likewise.
7243
+ (AIX_FUNC_DESC_TOC_64BIT): Likewise.
7244
+ (AIX_FUNC_DESC_SC_32BIT): Likewise.
7245
+ (AIX_FUNC_DESC_SC_64BIT): Likewise.
7246
+ ("call" expander): Call rs6000_call_aix.
7247
+ ("call_value" expander): Likewise.
7248
+ ("call_indirect_aix<ptrsize>"): Replace this pattern ...
7249
+ ("call_indirect_aix<ptrsize>_nor11"): ... and this pattern ...
7250
+ ("*call_indirect_aix<mode>"): ... by this insn pattern.
7251
+ ("call_value_indirect_aix<ptrsize>"): Replace this pattern ...
7252
+ ("call_value_indirect_aix<ptrsize>_nor11"): ... and this pattern ...
7253
+ ("*call_value_indirect_aix<mode>"): ... by this insn pattern.
7254
+ ("*call_nonlocal_aix32", "*call_nonlocal_aix64"): Replace by ...
7255
+ ("*call_nonlocal_aix<mode>"): ... this pattern.
7256
+ ("*call_value_nonlocal_aix32", "*call_value_nonlocal_aix64"): Replace
7257
+ ("*call_value_nonlocal_aix<mode>"): ... by this pattern.
7258
+ ("*call_local_aix<mode>"): New insn pattern.
7259
+ ("*call_value_local_aix<mode>"): Likewise.
7260
+ ("sibcall" expander): Call rs6000_sibcall_aix.
7261
+ ("sibcall_value" expander): Likewise. Move earlier in file.
7262
+ ("*sibcall_nonlocal_aix<mode>"): Replace by ...
7263
+ ("*sibcall_aix<mode>"): ... this pattern.
7264
+ ("*sibcall_value_nonlocal_aix<mode>"): Replace by ...
7265
+ ("*sibcall_value_aix<mode>"): ... this pattern.
7266
+ * config/rs6000/rs6000-protos.h (rs6000_call_indirect_aix): Remove.
7267
+ (rs6000_call_aix): Add prototype.
7268
+ (rs6000_sibcall_aix): Likewise.
7270
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7272
+ Backport from mainline r204799:
7274
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7276
+ * config/rs6000/rs6000.c (rs6000_emit_prologue): Do not place a
7277
+ RTX_FRAME_RELATED_P marker on the UNSPEC_MOVESI_FROM_CR insn.
7278
+ Instead, add USEs of all modified call-saved CR fields to the
7279
+ insn storing the result to the stack slot, and provide an
7280
+ appropriate REG_FRAME_RELATED_EXPR for that insn.
7281
+ * config/rs6000/rs6000.md ("*crsave"): New insn pattern.
7282
+ * config/rs6000/predicates.md ("crsave_operation"): New predicate.
7284
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7286
+ Backport from mainline r204798:
7288
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7289
+ Alan Modra <amodra@gmail.com>
7291
+ * function.c (assign_parms): Use all.reg_parm_stack_space instead
7292
+ of re-evaluating REG_PARM_STACK_SPACE target macro.
7293
+ (locate_and_pad_parm): New parameter REG_PARM_STACK_SPACE. Use it
7294
+ instead of evaluating target macro REG_PARM_STACK_SPACE every time.
7295
+ (assign_parm_find_entry_rtl): Update call.
7296
+ * calls.c (initialize_argument_information): Update call.
7297
+ (emit_library_call_value_1): Likewise.
7298
+ * expr.h (locate_and_pad_parm): Update prototype.
7300
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7302
+ Backport from mainline r204797:
7304
+ 2013-11-14 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7306
+ * calls.c (store_unaligned_arguments_into_pseudos): Skip PARALLEL
7309
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7311
+ Backport from mainline r197003:
7313
+ 2013-03-23 Eric Botcazou <ebotcazou@adacore.com>
7315
+ * calls.c (expand_call): Add missing guard to code handling return
7316
+ of non-BLKmode structures in MSB.
7317
+ * function.c (expand_function_end): Likewise.
7319
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
7321
+ Backport from mainline r201750.
7322
+ Note: Default setting of -mcompat-align-parm inverted!
7324
+ 2013-08-14 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7327
+ * doc/invoke.texi: Add documentation of mcompat-align-parm
7329
+ * config/rs6000/rs6000.opt: Add mcompat-align-parm option.
7330
+ * config/rs6000/rs6000.c (rs6000_function_arg_boundary): For AIX
7331
+ and Linux, correct BLKmode alignment when 128-bit alignment is
7332
+ required and compatibility flag is not set.
7333
+ (rs6000_gimplify_va_arg): For AIX and Linux, honor specified
7334
+ alignment for zero-size arguments when compatibility flag is not
7337
+2013-11-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7339
+ * configure: Regenerate.
7341
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7343
+ Backport from mainline r204441
7344
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7346
+ * config/rs6000/rs6000.c (rs6000_option_override_internal):
7347
+ Remove restriction against use of VSX instructions when generating
7348
+ code for little endian mode.
7350
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7352
+ Backport from mainline r204440
7353
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7355
+ * config/rs6000/altivec.md (mulv4si3): Ensure we generate vmulouh
7356
+ for both big and little endian.
7357
+ (mulv8hi3): Swap input operands for merge high and merge low
7358
+ instructions for little endian.
7360
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7362
+ Backport from mainline r204439
7363
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7365
+ * config/rs6000/altivec.md (vec_widen_umult_even_v16qi): Change
7366
+ define_insn to define_expand that uses even patterns for big
7367
+ endian and odd patterns for little endian.
7368
+ (vec_widen_smult_even_v16qi): Likewise.
7369
+ (vec_widen_umult_even_v8hi): Likewise.
7370
+ (vec_widen_smult_even_v8hi): Likewise.
7371
+ (vec_widen_umult_odd_v16qi): Likewise.
7372
+ (vec_widen_smult_odd_v16qi): Likewise.
7373
+ (vec_widen_umult_odd_v8hi): Likewise.
7374
+ (vec_widen_smult_odd_v8hi): Likewise.
7375
+ (altivec_vmuleub): New define_insn.
7376
+ (altivec_vmuloub): Likewise.
7377
+ (altivec_vmulesb): Likewise.
7378
+ (altivec_vmulosb): Likewise.
7379
+ (altivec_vmuleuh): Likewise.
7380
+ (altivec_vmulouh): Likewise.
7381
+ (altivec_vmulesh): Likewise.
7382
+ (altivec_vmulosh): Likewise.
7384
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7386
+ Backport from mainline r204395
7387
+ 2013-11-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7389
+ * config/rs6000/vector.md (vec_pack_sfix_trunc_v2df): Adjust for
7391
+ (vec_pack_ufix_trunc_v2df): Likewise.
7393
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7395
+ Backport from mainline r204363
7396
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7398
+ * config/rs6000/altivec.md (vec_widen_umult_hi_v16qi): Swap
7399
+ arguments to merge instruction for little endian.
7400
+ (vec_widen_umult_lo_v16qi): Likewise.
7401
+ (vec_widen_smult_hi_v16qi): Likewise.
7402
+ (vec_widen_smult_lo_v16qi): Likewise.
7403
+ (vec_widen_umult_hi_v8hi): Likewise.
7404
+ (vec_widen_umult_lo_v8hi): Likewise.
7405
+ (vec_widen_smult_hi_v8hi): Likewise.
7406
+ (vec_widen_smult_lo_v8hi): Likewise.
7408
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7410
+ Backport from mainline r204350
7411
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7413
+ * config/rs6000/vsx.md (*vsx_le_perm_store_<mode> for VSX_D):
7414
+ Replace the define_insn_and_split with a define_insn and two
7415
+ define_splits, with the split after reload re-permuting the source
7416
+ register to its original value.
7417
+ (*vsx_le_perm_store_<mode> for VSX_W): Likewise.
7418
+ (*vsx_le_perm_store_v8hi): Likewise.
7419
+ (*vsx_le_perm_store_v16qi): Likewise.
7421
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7423
+ Backport from mainline r204321
7424
+ 2013-11-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7426
+ * config/rs6000/vector.md (vec_pack_trunc_v2df): Adjust for
7429
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7431
+ Backport from mainline r204321
7432
+ 2013-11-02 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
7434
+ * config/rs6000/rs6000.c (rs6000_expand_vector_set): Adjust for
7437
+2013-11-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7439
+ Backport from mainline r203980
7440
+ 2013-10-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7442
+ * config/rs6000/altivec.md (mulv8hi3): Adjust for little endian.
7444
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7446
+ Backport from mainline r203930
7447
+ 2013-10-22 Bill Schmidt <wschmidt@vnet.ibm.com>
7449
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Reverse
7450
+ meaning of merge-high and merge-low masks for little endian; avoid
7451
+ use of vector-pack masks for little endian for mismatched modes.
7453
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7455
+ Backport from mainline r203877
7456
+ 2013-10-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7458
+ * config/rs6000/altivec.md (vec_unpacku_hi_v16qi): Adjust for
7460
+ (vec_unpacku_hi_v8hi): Likewise.
7461
+ (vec_unpacku_lo_v16qi): Likewise.
7462
+ (vec_unpacku_lo_v8hi): Likewise.
7464
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7466
+ Backport from mainline r203863
7467
+ 2013-10-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7469
+ * config/rs6000/rs6000.c (vspltis_constant): Make sure we check
7470
+ all elements for both endian flavors.
7472
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7474
+ Backport from mainline r203714
7475
+ 2013-10-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7477
+ * gcc/config/rs6000/vector.md (vec_unpacks_hi_v4sf): Correct for
7479
+ (vec_unpacks_lo_v4sf): Likewise.
7480
+ (vec_unpacks_float_hi_v4si): Likewise.
7481
+ (vec_unpacks_float_lo_v4si): Likewise.
7482
+ (vec_unpacku_float_hi_v4si): Likewise.
7483
+ (vec_unpacku_float_lo_v4si): Likewise.
7485
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7487
+ Backport from mainline r203713
7488
+ 2013-10-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7490
+ * config/rs6000/vsx.md (vsx_concat_<mode>): Adjust output for LE.
7491
+ (vsx_concat_v2sf): Likewise.
7493
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7495
+ Backport from mainline r203458
7496
+ 2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7498
+ * config/rs6000/vsx.md (*vsx_le_perm_load_v2di): Generalize to
7499
+ handle vector float as well.
7500
+ (*vsx_le_perm_load_v4si): Likewise.
7501
+ (*vsx_le_perm_store_v2di): Likewise.
7502
+ (*vsx_le_perm_store_v4si): Likewise.
7504
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7506
+ Backport from mainline r203457
7507
+ 2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7509
+ * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
7510
+ directly to circumvent subtract from splat{31} workaround.
7511
+ * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
7513
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
7514
+ * config/rs6000/altivec.md (define_c_enum "unspec"): Add
7515
+ UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
7516
+ (altivec_vperm_<mode>): Convert to define_insn_and_split to
7517
+ separate big and little endian logic.
7518
+ (*altivec_vperm_<mode>_internal): New define_insn.
7519
+ (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
7520
+ separate big and little endian logic.
7521
+ (*altivec_vperm_<mode>_uns_internal): New define_insn.
7522
+ (vec_permv16qi): Add little endian logic.
7524
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7526
+ Backport from mainline r203247
7527
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7529
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const_le): New.
7530
+ (altivec_expand_vec_perm_const): Call it.
7532
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7534
+ Backport from mainline r203246
7535
+ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7537
+ * config/rs6000/vector.md (mov<mode>): Emit permuted move
7538
+ sequences for LE VSX loads and stores at expand time.
7539
+ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_move): New
7541
+ * config/rs6000/rs6000.c (rs6000_const_vec): New.
7542
+ (rs6000_gen_le_vsx_permute): New.
7543
+ (rs6000_gen_le_vsx_load): New.
7544
+ (rs6000_gen_le_vsx_store): New.
7545
+ (rs6000_gen_le_vsx_move): New.
7546
+ * config/rs6000/vsx.md (*vsx_le_perm_load_v2di): New.
7547
+ (*vsx_le_perm_load_v4si): New.
7548
+ (*vsx_le_perm_load_v8hi): New.
7549
+ (*vsx_le_perm_load_v16qi): New.
7550
+ (*vsx_le_perm_store_v2di): New.
7551
+ (*vsx_le_perm_store_v4si): New.
7552
+ (*vsx_le_perm_store_v8hi): New.
7553
+ (*vsx_le_perm_store_v16qi): New.
7554
+ (*vsx_xxpermdi2_le_<mode>): New.
7555
+ (*vsx_xxpermdi4_le_<mode>): New.
7556
+ (*vsx_xxpermdi8_le_V8HI): New.
7557
+ (*vsx_xxpermdi16_le_V16QI): New.
7558
+ (*vsx_lxvd2x2_le_<mode>): New.
7559
+ (*vsx_lxvd2x4_le_<mode>): New.
7560
+ (*vsx_lxvd2x8_le_V8HI): New.
7561
+ (*vsx_lxvd2x16_le_V16QI): New.
7562
+ (*vsx_stxvd2x2_le_<mode>): New.
7563
+ (*vsx_stxvd2x4_le_<mode>): New.
7564
+ (*vsx_stxvd2x8_le_V8HI): New.
7565
+ (*vsx_stxvd2x16_le_V16QI): New.
7567
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7569
+ Backport from mainline r201235
7570
+ 2013-07-24 Bill Schmidt <wschmidt@linux.ibm.com>
7571
+ Anton Blanchard <anton@au1.ibm.com>
7573
+ * config/rs6000/altivec.md (altivec_vpkpx): Handle little endian.
7574
+ (altivec_vpks<VI_char>ss): Likewise.
7575
+ (altivec_vpks<VI_char>us): Likewise.
7576
+ (altivec_vpku<VI_char>us): Likewise.
7577
+ (altivec_vpku<VI_char>um): Likewise.
7579
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7581
+ Backport from mainline r201208
7582
+ 2013-07-24 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
7583
+ Anton Blanchard <anton@au1.ibm.com>
7585
+ * config/rs6000/vector.md (vec_realign_load_<mode>): Reorder input
7586
+ operands to vperm for little endian.
7587
+ * config/rs6000/rs6000.c (rs6000_expand_builtin): Use lvsr instead
7588
+ of lvsl to create the control mask for a vperm for little endian.
7590
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7592
+ Backport from mainline r201195
7593
+ 2013-07-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7594
+ Anton Blanchard <anton@au1.ibm.com>
7596
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Reverse
7597
+ two operands for little-endian.
7599
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7601
+ Backport from mainline r201193
7602
+ 2013-07-23 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7603
+ Anton Blanchard <anton@au1.ibm.com>
7605
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Correct
7606
+ selection of field for vector splat in little endian mode.
7608
+2013-11-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7610
+ Backport from mainline r201149
7611
+ 2013-07-22 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
7612
+ Anton Blanchard <anton@au1.ibm.com>
7614
+ * config/rs6000/rs6000.c (rs6000_expand_vector_init): Fix
7615
+ endianness when selecting field to splat.
7617
+2013-10-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7619
+ Backport from mainline
7620
+ 2013-04-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
7623
+ * config/rs6000/rs6000.c (rs6000_emit_swdiv_high_precision): Remove.
7624
+ (rs6000_emit_swdiv_low_precision): Remove.
7625
+ (rs6000_emit_swdiv): Rewrite to handle between one and four
7626
+ iterations of Newton-Raphson generally; modify required number of
7627
+ iterations for some cases.
7628
+ * config/rs6000/rs6000.h (RS6000_RECIP_HIGH_PRECISION_P): Remove.
7630
+2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
7632
+ Backport from mainline
7633
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
7635
+ * config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new
7636
+ fields to the reg_addr array that describes the valid addressing
7637
+ mode for any register, general purpose registers, floating point
7638
+ registers, and Altivec registers.
7639
+ (FIRST_RELOAD_REG_CLASS): Likewise.
7640
+ (LAST_RELOAD_REG_CLASS): Likewise.
7641
+ (struct reload_reg_map_type): Likewise.
7642
+ (reload_reg_map_type): Likewise.
7643
+ (RELOAD_REG_VALID): Likewise.
7644
+ (RELOAD_REG_MULTIPLE): Likewise.
7645
+ (RELOAD_REG_INDEXED): Likewise.
7646
+ (RELOAD_REG_OFFSET): Likewise.
7647
+ (RELOAD_REG_PRE_INCDEC): Likewise.
7648
+ (RELOAD_REG_PRE_MODIFY): Likewise.
7649
+ (reg_addr): Likewise.
7650
+ (mode_supports_pre_incdec_p): New helper functions to say whether
7651
+ a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY.
7652
+ (mode_supports_pre_modify_p): Likewise.
7653
+ (rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to
7654
+ print the valid address mode bits for each mode.
7655
+ (rs6000_debug_print_mode): Likewise.
7656
+ (rs6000_debug_reg_global): Likewise.
7657
+ (rs6000_setup_reg_addr_masks): New function to set up the address
7658
+ mask bits for each type.
7659
+ (rs6000_init_hard_regno_mode_ok): Use memset to clear arrays.
7660
+ Call rs6000_setup_reg_addr_masks to set up the address mask bits.
7661
+ (rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and
7662
+ mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and
7663
+ PRE_MODIFY are supported.
7664
+ (rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec
7665
+ registers, instead of {src,dest}_av_p.
7666
+ (rs6000_print_options_internal): Tweak the debug output slightly.
7668
+ Backport from mainline
7669
+ 2013-10-03 Michael Meissner <meissner@linux.vnet.ibm.com>
7671
+ * config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2,
7672
+ ceildf2, btruncdf2, instead of vsx_* name.
7674
+ * config/rs6000/vsx.md (vsx_add<mode>3): Change arithmetic
7675
+ iterators to only do V2DF and V4SF here. Move the DF code to
7676
+ rs6000.md where it is combined with SF mode. Replace <VSv> with
7677
+ just 'v' since only vector operations are handled with these insns
7678
+ after moving the DF support to rs6000.md.
7679
+ (vsx_sub<mode>3): Likewise.
7680
+ (vsx_mul<mode>3): Likewise.
7681
+ (vsx_div<mode>3): Likewise.
7682
+ (vsx_fre<mode>2): Likewise.
7683
+ (vsx_neg<mode>2): Likewise.
7684
+ (vsx_abs<mode>2): Likewise.
7685
+ (vsx_nabs<mode>2): Likewise.
7686
+ (vsx_smax<mode>3): Likewise.
7687
+ (vsx_smin<mode>3): Likewise.
7688
+ (vsx_sqrt<mode>2): Likewise.
7689
+ (vsx_rsqrte<mode>2): Likewise.
7690
+ (vsx_fms<mode>4): Likewise.
7691
+ (vsx_nfma<mode>4): Likewise.
7692
+ (vsx_copysign<mode>3): Likewise.
7693
+ (vsx_btrunc<mode>2): Likewise.
7694
+ (vsx_floor<mode>2): Likewise.
7695
+ (vsx_ceil<mode>2): Likewise.
7696
+ (vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md.
7697
+ (vsx_sminsf3): Likewise.
7698
+ (vsx_fmadf4): Likewise.
7699
+ (vsx_fmsdf4): Likewise.
7700
+ (vsx_nfmadf4): Likewise.
7701
+ (vsx_nfmsdf4): Likewise.
7702
+ (vsx_cmpdf_internal1): Likewise.
7704
+ * config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it
7705
+ simpler to select whether a target has SPE or traditional floating
7706
+ point support in iterators.
7707
+ (TARGET_DF_SPE): Likewise.
7708
+ (TARGET_SF_FPR): Likewise.
7709
+ (TARGET_DF_FPR): Likewise.
7710
+ (TARGET_SF_INSN): Macros to say whether floating point support
7711
+ exists for a given operation for expanders.
7712
+ (TARGET_DF_INSN): Likewise.
7714
+ * config/rs6000/rs6000.c (Ftrad): New mode attributes to allow
7715
+ combining of SF/DF mode operations, using both traditional and VSX
7723
+ (abs<mode>2): Combine SF/DF modes using traditional floating point
7724
+ instructions. Add support for using the upper DF registers with
7725
+ VSX support, and SF registers with power8-vector support. Update
7726
+ expanders for operations supported by both the SPE and traditional
7727
+ floating point units.
7728
+ (abs<mode>2_fpr): Likewise.
7729
+ (nabs<mode>2): Likewise.
7730
+ (nabs<mode>2_fpr): Likewise.
7731
+ (neg<mode>2): Likewise.
7732
+ (neg<mode>2_fpr): Likewise.
7733
+ (add<mode>3): Likewise.
7734
+ (add<mode>3_fpr): Likewise.
7735
+ (sub<mode>3): Likewise.
7736
+ (sub<mode>3_fpr): Likewise.
7737
+ (mul<mode>3): Likewise.
7738
+ (mul<mode>3_fpr): Likewise.
7739
+ (div<mode>3): Likewise.
7740
+ (div<mode>3_fpr): Likewise.
7741
+ (sqrt<mode>3): Likewise.
7742
+ (sqrt<mode>3_fpr): Likewise.
7743
+ (fre<Fs>): Likewise.
7744
+ (rsqrt<mode>2): Likewise.
7745
+ (cmp<mode>_fpr): Likewise.
7746
+ (smax<mode>3): Likewise.
7747
+ (smin<mode>3): Likewise.
7748
+ (smax<mode>3_vsx): Likewise.
7749
+ (smin<mode>3_vsx): Likewise.
7750
+ (negsf2): Delete SF operations that are merged with DF.
7751
+ (abssf2): Likewise.
7752
+ (addsf3): Likewise.
7753
+ (subsf3): Likewise.
7754
+ (mulsf3): Likewise.
7755
+ (divsf3): Likewise.
7757
+ (fmasf4_fpr): Likewise.
7758
+ (fmssf4_fpr): Likewise.
7759
+ (nfmasf4_fpr): Likewise.
7760
+ (nfmssf4_fpr): Likewise.
7761
+ (sqrtsf2): Likewise.
7762
+ (rsqrtsf_internal1): Likewise.
7763
+ (smaxsf3): Likewise.
7764
+ (sminsf3): Likewise.
7765
+ (cmpsf_internal1): Likewise.
7766
+ (copysign<mode>3_fcpsgn): Add VSX/power8-vector support.
7767
+ (negdf2): Delete DF operations that are merged with SF.
7768
+ (absdf2): Likewise.
7769
+ (nabsdf2): Likewise.
7770
+ (adddf3): Likewise.
7771
+ (subdf3): Likewise.
7772
+ (muldf3): Likewise.
7773
+ (divdf3): Likewise.
7775
+ (rsqrtdf_internal1): Likewise.
7776
+ (fmadf4_fpr): Likewise.
7777
+ (fmsdf4_fpr): Likewise.
7778
+ (nfmadf4_fpr): Likewise.
7779
+ (nfmsdf4_fpr): Likewise.
7780
+ (sqrtdf2): Likewise.
7781
+ (smaxdf3): Likewise.
7782
+ (smindf3): Likewise.
7783
+ (cmpdf_internal1): Likewise.
7784
+ (lrint<mode>di2): Use TARGET_<MODE>_FPR macro.
7785
+ (btrunc<mode>2): Delete separate expander, and combine with the
7786
+ insn and add VSX instruction support. Use TARGET_<MODE>_FPR.
7787
+ (btrunc<mode>2_fpr): Likewise.
7788
+ (ceil<mode>2): Likewise.
7789
+ (ceil<mode>2_fpr): Likewise.
7790
+ (floor<mode>2): Likewise.
7791
+ (floor<mode>2_fpr): Likewise.
7792
+ (fma<mode>4_fpr): Combine SF and DF fused multiply/add support.
7793
+ Add support for using the upper registers with VSX and
7794
+ power8-vector. Move insns to be closer to the define_expands. On
7795
+ VSX systems, prefer the traditional form of FMA over the VSX
7796
+ version, since the traditional form allows the target not to
7797
+ overlap with the inputs.
7798
+ (fms<mode>4_fpr): Likewise.
7799
+ (nfma<mode>4_fpr): Likewise.
7800
+ (nfms<mode>4_fpr): Likewise.
7802
+ Backport from mainline
7803
+ 2013-09-27 Michael Meissner <meissner@linux.vnet.ibm.com>
7805
+ * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
7806
+ DFmode, DImode, and SFmode in the upper VSX registers based on the
7807
+ -mupper-regs-{df,sf} flags. Fix wu constraint to be ALTIVEC_REGS
7808
+ if -mpower8-vector. Combine -mvsx-timode handling with the rest
7809
+ of the VSX register handling.
7811
+ * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
7812
+ (f32_sv): Likewise.
7813
+ (zero_extendsidi2_lfiwzx): Add support for loading into the
7814
+ Altivec registers with -mpower8-vector. Use wu/wv constraints to
7815
+ only do VSX memory options on Altivec registers.
7816
+ (extendsidi2_lfiwax): Likewise.
7817
+ (extendsfdf2_fpr): Likewise.
7818
+ (mov<mode>_hardfloat, SF/SD modes): Likewise.
7819
+ (mov<mode>_hardfloat32, DF/DD modes): Likewise.
7820
+ (mov<mode>_hardfloat64, DF/DD modes): Likewise.
7821
+ (movdi_internal64): Likewise.
7823
+ Backport from mainline
7824
+ 2013-09-23 Michael Meissner <meissner@linux.vnet.ibm.com>
7826
+ * config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine
7827
+ reload helper function arrays into a single array reg_addr.
7828
+ (reload_fpr_gpr): Likewise.
7829
+ (reload_gpr_vsx): Likewise.
7830
+ (reload_vsx_gpr): Likewise.
7831
+ (struct rs6000_reg_addr): Likewise.
7832
+ (reg_addr): Likewise.
7833
+ (rs6000_debug_reg_global): Change rs6000_vector_reload,
7834
+ reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr.
7835
+ (rs6000_init_hard_regno_mode_ok): Likewise.
7836
+ (rs6000_secondary_reload_direct_move): Likewise.
7837
+ (rs6000_secondary_reload): Likewise.
7839
+ * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new
7840
+ constraints: wu, ww, and wy. Repurpose wv constraint added during
7841
+ power8 changes. Put wg constraint in alphabetical order.
7843
+ * config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch
7844
+ for future work to add ISA 2.07 VSX single precision support.
7845
+ (-mvsx-scalar-double): Change default from -1 to 1, update
7846
+ documentation comment.
7847
+ (-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df.
7848
+ (-mupper-regs-df): New debug switch to control whether DF values
7849
+ can go in the traditional Altivec registers.
7850
+ (-mupper-regs-sf): New debug switch to control whether SF values
7851
+ can go in the traditional Altivec registers.
7853
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww,
7854
+ and wy constraints.
7855
+ (rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for
7856
+ loop variables. Rename -mvsx-scalar-memory to -mupper-regs-df.
7857
+ Add new constraints, wu/ww/wy. Repurpose wv constraint.
7858
+ (rs6000_debug_legitimate_address_p): Print if we are running
7859
+ before, during, or after reload.
7860
+ (rs6000_secondary_reload): Add a comment.
7861
+ (rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf.
7863
+ * config/rs6000/constraints.md (wa constraint): Sort w<x>
7864
+ constraints. Update documentation string.
7865
+ (wd constraint): Likewise.
7866
+ (wf constraint): Likewise.
7867
+ (wg constraint): Likewise.
7868
+ (wn constraint): Likewise.
7869
+ (ws constraint): Likewise.
7870
+ (wt constraint): Likewise.
7871
+ (wx constraint): Likewise.
7872
+ (wz constraint): Likewise.
7873
+ (wu constraint): New constraint for ISA 2.07 SFmode scalar
7875
+ (ww constraint): Likewise.
7876
+ (wy constraint): Likewise.
7877
+ (wv constraint): Repurpose ISA 2.07 constraint that did not use in
7878
+ the previous submissions.
7879
+ * doc/md.texi (PowerPC and IBM RS6000): Likewise.
7881
+ Backport from mainline
7882
+ 2013-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
7885
+ * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
7886
+ restrict TImode addresses to single indirect registers if both
7887
+ -mquad-memory and -mvsx-timode are used.
7888
+ (rs6000_output_move_128bit): Use quad_load_store_p to determine if
7889
+ we should emit load/store quad. Remove using %y for quad memory
7892
+ * config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
7893
+ constraints to allow load/store quad on machines where TImode is
7894
+ not allowed in VSX registers. Use 'n' instead of 'F' constraint
7895
+ for TImode to load integer constants.
7897
+2013-10-02 Michael Meissner <meissner@linux.vnet.ibm.com>
7899
+ Backport from mainline
7900
+ 2013-10-02 Michael Meissner <meissner@linux.vnet.ibm.com>
7903
+ * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off
7904
+ setting -mvsx-timode by default until the underlying problem is
7906
+ (RS6000_CPU, power7 defaults): Likewise.
7908
+2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
7910
+ Backport from mainline
7911
+ 2013-08-19 Peter Bergner <bergner@vnet.ibm.com>
7912
+ Jakub Jelinek <jakub@redhat.com>
7914
+ * builtins.def (BUILT_IN_FABSD32): New DFP ABS builtin.
7915
+ (BUILT_IN_FABSD64): Likewise.
7916
+ (BUILT_IN_FABSD128): Likewise.
7917
+ * builtins.c (expand_builtin): Add support for
7918
+ new DFP ABS builtins.
7919
+ (fold_builtin_1): Likewise.
7920
+ * config/rs6000/dfp.md
7921
+ (*negtd2_fpr): Handle
7922
+ non-overlapping destination
7923
+ and source operands.
7929
+2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
7931
+ Backport from trunk
7932
+ 2013-08-16 Michael Meissner <meissner@linux.vnet.ibm.com>
7935
+ * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the
7936
+ memory rtx to contain ZERO_EXTEND and SIGN_EXTEND.
7938
+ * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands
7939
+ array instead of each individual operand as a separate argument.
7940
+ (emit_fusion_gpr_load): Likewise.
7941
+ (expand_fusion_gpr_load): Add new function declaration.
7943
+ * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling
7944
+ signature to have the operands passed as an array, instead of as
7945
+ separate arguments. Allow ZERO_EXTEND to be in the memory
7946
+ address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not
7947
+ depend on the register live/dead flags when peepholes are run.
7948
+ (expand_fusion_gpr_load): New function to be called from the
7949
+ peephole2 pass, to change the register that addis sets to be the
7951
+ (emit_fusion_gpr_load): Change the calling signature to have the
7952
+ operands passed as an array, instead of as separate arguments.
7953
+ Allow ZERO_EXTEND to be in the memory address, and also
7954
+ SIGN_EXTEND if -mpower8-fusion-sign.
7956
+ * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused
7957
+ unspec enumeration.
7958
+ (power8 fusion peephole/peephole2): Rework the fusion peepholes to
7959
+ adjust the register addis loads up in the peephole2 pass. Do not
7960
+ depend on the register live/dead state when the peephole pass is
7963
+ Backport from trunk
7964
+ 2013-07-23 Michael Meissner <meissner@linux.vnet.ibm.com>
7966
+ * config/rs6000/vector.md (xor<mode>3): Move 128-bit boolean
7967
+ expanders to rs6000.md.
7968
+ (ior<mode>3): Likewise.
7969
+ (and<mode>3): Likewise.
7970
+ (one_cmpl<mode>2): Likewise.
7971
+ (nor<mode>3): Likewise.
7972
+ (andc<mode>3): Likewise.
7973
+ (eqv<mode>3): Likewise.
7974
+ (nand<mode>3): Likewise.
7975
+ (orc<mode>3): Likewise.
7977
+ * config/rs6000/rs6000-protos.h (rs6000_split_logical): New
7980
+ * config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support
7981
+ to split multi-word logical operations.
7982
+ (rs6000_split_logical_di): Likewise.
7983
+ (rs6000_split_logical): Likewise.
7985
+ * config/rs6000/vsx.md (VSX_L2): Delete, no longer used.
7986
+ (vsx_and<mode>3_32bit): Move 128-bit logical insns to rs6000.md,
7987
+ and allow TImode operations in 32-bit.
7988
+ (vsx_and<mode>3_64bit): Likewise.
7989
+ (vsx_ior<mode>3_32bit): Likewise.
7990
+ (vsx_ior<mode>3_64bit): Likewise.
7991
+ (vsx_xor<mode>3_32bit): Likewise.
7992
+ (vsx_xor<mode>3_64bit): Likewise.
7993
+ (vsx_one_cmpl<mode>2_32bit): Likewise.
7994
+ (vsx_one_cmpl<mode>2_64bit): Likewise.
7995
+ (vsx_nor<mode>3_32bit): Likewise.
7996
+ (vsx_nor<mode>3_64bit): Likewise.
7997
+ (vsx_andc<mode>3_32bit): Likewise.
7998
+ (vsx_andc<mode>3_64bit): Likewise.
7999
+ (vsx_eqv<mode>3_32bit): Likewise.
8000
+ (vsx_eqv<mode>3_64bit): Likewise.
8001
+ (vsx_nand<mode>3_32bit): Likewise.
8002
+ (vsx_nand<mode>3_64bit): Likewise.
8003
+ (vsx_orc<mode>3_32bit): Likewise.
8004
+ (vsx_orc<mode>3_64bit): Likewise.
8006
+ * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector
8007
+ logical types in GPRs.
8009
+ * config/rs6000/altivec.md (altivec_and<mode>3): Move 128-bit
8010
+ logical insns to rs6000.md, and allow TImode operations in
8012
+ (altivec_ior<mode>3): Likewise.
8013
+ (altivec_xor<mode>3): Likewise.
8014
+ (altivec_one_cmpl<mode>2): Likewise.
8015
+ (altivec_nor<mode>3): Likewise.
8016
+ (altivec_andc<mode>3): Likewise.
8018
+ * config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode
8019
+ attributes for moving the 128-bit logical operations into
8021
+ (BOOL_REGS_OUTPUT): Likewise.
8022
+ (BOOL_REGS_OP1): Likewise.
8023
+ (BOOL_REGS_OP2): Likewise.
8024
+ (BOOL_REGS_UNARY): Likewise.
8025
+ (BOOL_REGS_AND_CR0): Likewise.
8026
+ (one_cmpl<mode>2): Add support for DI logical operations on
8027
+ 32-bit, splitting the operations to 32-bit.
8028
+ (anddi3): Likewise.
8029
+ (iordi3): Likewise.
8030
+ (xordi3): Likewise.
8031
+ (and<mode>3, 128-bit types): Rewrite 2013-06-06 logical operator
8032
+ changes to combine the 32/64-bit code, allow logical operations on
8033
+ TI mode in 32-bit, and to use similar match_operator patterns like
8034
+ scalar mode uses. Combine the Altivec and VSX code for logical
8035
+ operations, and move it here.
8036
+ (ior<mode>3, 128-bit types): Likewise.
8037
+ (xor<mode>3, 128-bit types): Likewise.
8038
+ (one_cmpl<mode>3, 128-bit types): Likewise.
8039
+ (nor<mode>3, 128-bit types): Likewise.
8040
+ (andc<mode>3, 128-bit types): Likewise.
8041
+ (eqv<mode>3, 128-bit types): Likewise.
8042
+ (nand<mode>3, 128-bit types): Likewise.
8043
+ (orc<mode>3, 128-bit types): Likewise.
8044
+ (and<mode>3_internal): Likewise.
8045
+ (bool<mode>3_internal): Likewise.
8046
+ (boolc<mode>3_internal1): Likewise.
8047
+ (boolc<mode>3_internal2): Likewise.
8048
+ (boolcc<mode>3_internal1): Likewise.
8049
+ (boolcc<mode>3_internal2): Likewise.
8050
+ (eqv<mode>3_internal1): Likewise.
8051
+ (eqv<mode>3_internal2): Likewise.
8052
+ (one_cmpl1<mode>3_internal): Likewise.
8054
+2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
8056
+ Backport from mainline
8057
+ 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
8059
+ * config/rs6000/predicates.md (fusion_gpr_addis): New predicates
8060
+ to support power8 load fusion.
8061
+ (fusion_gpr_mem_load): Likewise.
8063
+ * config/rs6000/rs6000-modes.def (PTImode): Update a comment.
8065
+ * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
8066
+ declarations for power8 load fusion.
8067
+ (emit_fusion_gpr_load): Likewise.
8069
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): If
8070
+ tuning for power8, turn on fusion mode by default. Turn on sign
8071
+ extending fusion mode if normal fusion mode is on, and we are at
8073
+ (fusion_gpr_load_p): New function, return true if we can fuse an
8074
+ addis instruction with a dependent load to a GPR.
8075
+ (emit_fusion_gpr_load): Emit the instructions for power8 load
8078
+ * config/rs6000/vsx.md (VSX_M2): New iterator for fusion
8080
+ (VSX load fusion peepholes): New peepholes to fuse together an
8081
+ addi instruction with a VSX load instruction.
8083
+ * config/rs6000/rs6000.md (GPR load fusion peepholes): New
8084
+ peepholes to fuse an addis instruction with a load to a GPR base
8085
+ register. If we are supporting sign extending fusions, convert
8086
+ sign extending loads to zero extending loads and add an explicit
8089
+2013-07-19 Pat Haugen <pthaugen@us.ibm.com>
8091
+ Backport from mainline
8092
+ 2013-07-18 Pat Haugen <pthaugen@us.ibm.com>
8094
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag
8095
+ interaction for new Power8 flags and VSX.
8097
+2013-07-17 Peter Bergner <bergner@vnet.ibm.com>
8099
+ Backport from mainline
8100
+ 2013-07-17 Iain Sandoe <iain@codesourcery.com>
8102
+ * config/rs6000/darwin.h (REGISTER_NAMES): Add HTM registers.
8104
+2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
8106
+ Merge up to 200989.
8107
+ * REVISION: Update subversion id.
8109
+2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
8111
+ Backport from mainline
8112
+ 2013-07-16 Peter Bergner <bergner@vnet.ibm.com>
8114
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Do not
8115
+ enable extra ISA flags with TARGET_HTM.
8117
+ 2013-07-16 Jakub Jelinek <jakub@redhat.com>
8118
+ Peter Bergner <bergner@vnet.ibm.com>
8120
+ * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTERS): Mention HTM
8121
+ registers in the comment.
8122
+ (DWARF_FRAME_REGISTERS): Subtract also the 3 HTM registers.
8123
+ (DWARF_REG_TO_UNWIND_COLUMN): Use DWARF_FRAME_REGISTERS
8124
+ rather than FIRST_PSEUDO_REGISTERS.
8126
+2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
8128
+ Backport from mainline
8129
+ 2013-07-15 Peter Bergner <bergner@vnet.ibm.com>
8131
+ * config.gcc (powerpc*-*-*): Install htmintrin.h and htmxlintrin.h.
8132
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add htm.md.
8133
+ * config/rs6000/rs6000.opt: Add -mhtm option.
8134
+ * config/rs6000/rs6000-cpus.def (POWERPC_MASKS): Add OPTION_MASK_HTM.
8135
+ (ISA_2_7_MASKS_SERVER): Add OPTION_MASK_HTM.
8136
+ * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
8137
+ __HTM__ if the HTM instructions are available.
8138
+ * config/rs6000/predicates.md (u3bit_cint_operand, u10bit_cint_operand,
8139
+ htm_spr_reg_operand): New define_predicates.
8140
+ * config/rs6000/rs6000.md (define_attr "type"): Add htm.
8141
+ (TFHAR_REGNO, TFIAR_REGNO, TEXASR_REGNO): New define_constants.
8143
+ * config/rs6000/rs6000-builtin.def (BU_HTM_0, BU_HTM_1, BU_HTM_2,
8144
+ BU_HTM_3, BU_HTM_SPR0, BU_HTM_SPR1): Add support macros for defining
8145
+ HTM builtin functions.
8146
+ * config/rs6000/rs6000.c (RS6000_BUILTIN_H): New macro.
8147
+ (rs6000_reg_names, alt_reg_names): Add HTM SPR register names.
8148
+ (rs6000_init_hard_regno_mode_ok): Add support for HTM instructions.
8149
+ (rs6000_builtin_mask_calculate): Likewise.
8150
+ (rs6000_option_override_internal): Likewise.
8151
+ (bdesc_htm): Add new HTM builtin support.
8152
+ (htm_spr_num): New function.
8153
+ (htm_spr_regno): Likewise.
8154
+ (rs6000_htm_spr_icode): Likewise.
8155
+ (htm_expand_builtin): Likewise.
8156
+ (htm_init_builtins): Likewise.
8157
+ (rs6000_expand_builtin): Add support for HTM builtin functions.
8158
+ (rs6000_init_builtins): Likewise.
8159
+ (rs6000_invalid_builtin, rs6000_opt_mask): Add support for -mhtm option.
8160
+ * config/rs6000/rs6000.h (ASM_CPU_SPEC): Add support for -mhtm.
8161
+ (TARGET_HTM, MASK_HTM): Define macros.
8162
+ (FIRST_PSEUDO_REGISTER): Adjust for new HTM SPR registers.
8163
+ (FIXED_REGISTERS): Likewise.
8164
+ (CALL_USED_REGISTERS): Likewise.
8165
+ (CALL_REALLY_USED_REGISTERS): Likewise.
8166
+ (REG_ALLOC_ORDER): Likewise.
8167
+ (enum reg_class): Likewise.
8168
+ (REG_CLASS_NAMES): Likewise.
8169
+ (REG_CLASS_CONTENTS): Likewise.
8170
+ (REGISTER_NAMES): Likewise.
8171
+ (ADDITIONAL_REGISTER_NAMES): Likewise.
8172
+ (RS6000_BTC_SPR, RS6000_BTC_VOID, RS6000_BTC_32BIT, RS6000_BTC_64BIT,
8173
+ RS6000_BTC_MISC_MASK, RS6000_BTM_HTM): New macros.
8174
+ (RS6000_BTM_COMMON): Add RS6000_BTM_HTM.
8175
+ * config/rs6000/htm.md: New file.
8176
+ * config/rs6000/htmintrin.h: New file.
8177
+ * config/rs6000/htmxlintrin.h: New file.
8179
+2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
8181
+ Back port from the trunk
8182
+ 2013-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
8185
+ * config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode
8186
+ to tie with any other modes. Eliminate Altivec vector mode tests,
8187
+ since these are a subset of ALTIVEC or VSX vector modes. Simplify
8188
+ code, to return 0 if testing MODE2 for a condition, if we've
8189
+ already tested MODE1 for the same condition.
8191
+2013-06-28 Pat Haugen <pthaugen@us.ibm.com>
8193
+ * config/rs6000/rs6000.md (define_insn ""): Fix insn type.
8195
+2013-06-26 Pat Haugen <pthaugen@us.ibm.com>
8197
+ Back port from the trunk
8198
+ 2013-06-26 Michael Meissner <meissner@linux.vnet.ibm.com>
8199
+ Pat Haugen <pthaugen@us.ibm.com>
8200
+ Peter Bergner <bergner@vnet.ibm.com>
8202
+ * config/rs6000/power8.md: New.
8203
+ * config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor
8204
+ setting for power8 entry.
8205
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md.
8206
+ * config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust
8207
+ test for Power4/Power5 only.
8208
+ (insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8
8210
+ (force_new_group): Adjust comment.
8211
+ * config/rs6000/rs6000.md: Include power8.md.
8213
+2013-06-14 Michael Meissner <meissner@linux.vnet.ibm.com>
8215
+ Back port from the trunk
8216
+ 2013-06-14 Michael Meissner <meissner@linux.vnet.ibm.com>
8219
+ * config/rs6000/rs6000.md (mov<mode>_ppc64): Call
8220
+ rs6000_output_move_128bit to handle emitting quad memory
8221
+ operations. Set attribute length to 8 bytes.
8223
+2013-06-13 Michael Meissner <meissner@linux.vnet.ibm.com>
8225
+ Back port from the trunk
8226
+ 2013-06-13 Michael Meissner <meissner@linux.vnet.ibm.com>
8228
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Move
8229
+ test for clearing quad memory on 32-bit later.
8231
+2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
8233
+ Back port from the trunk
8235
+ Backport from mainline
8236
+ 2013-06-12 Michael Meissner <meissner@linux.vnet.ibm.com>
8237
+ Pat Haugen <pthaugen@us.ibm.com>
8238
+ Peter Bergner <bergner@vnet.ibm.com>
8240
+ * config/rs6000/rs6000.c (emit_load_locked): Add support for
8241
+ power8 byte, half-word, and quad-word atomic instructions.
8242
+ (emit_store_conditional): Likewise.
8243
+ (rs6000_expand_atomic_compare_and_swap): Likewise.
8244
+ (rs6000_expand_atomic_op): Likewise.
8246
+ * config/rs6000/sync.md (larx): Add new modes for power8.
8248
+ (AINT): New mode iterator to include TImode as well as normal
8249
+ integer modes on power8.
8250
+ (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
8251
+ that VSX registers are not considered. Use AINT mode iterator
8252
+ instead of INT1 to allow inclusion of quad word atomic operations
8254
+ (load_locked<mode>): Likewise.
8255
+ (store_conditional<mode>): Likewise.
8256
+ (atomic_compare_and_swap<mode>): Likewise.
8257
+ (atomic_exchange<mode>): Likewise.
8258
+ (atomic_nand<mode>): Likewise.
8259
+ (atomic_fetch_<fetchop_name><mode>): Likewise.
8260
+ (atomic_nand_fetch<mode>): Likewise.
8261
+ (mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
8263
+ (ATOMIC): On power8, add QImode, HImode modes.
8264
+ (load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
8265
+ modes that promote to SImode.
8266
+ (load_lockedti): Convert TImode arguments to PTImode, so that we
8267
+ get a guaranteed even/odd register pair.
8268
+ (load_lockedpti): Likewise.
8269
+ (store_conditionalti): Likewise.
8270
+ (store_conditionalpti): Likewise.
8272
+ * config/rs6000/rs6000.md (QHI): New mode iterator for power8
8273
+ atomic load/store instructions.
8276
+2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
8278
+ Back port from the trunk
8280
+ 2013-06-11 Michael Meissner <meissner@linux.vnet.ibm.com>
8281
+ Pat Haugen <pthaugen@us.ibm.com>
8282
+ Peter Bergner <bergner@vnet.ibm.com>
8284
+ * config/rs6000/rs6000.c (emit_load_locked): Add support for
8285
+ power8 byte, half-word, and quad-word atomic instructions.
8286
+ (emit_store_conditional): Likewise.
8287
+ (rs6000_expand_atomic_compare_and_swap): Likewise.
8288
+ (rs6000_expand_atomic_op): Likewise.
8290
+ * config/rs6000/sync.md (larx): Add new modes for power8.
8292
+ (AINT): New mode iterator to include TImode as well as normal
8293
+ integer modes on power8.
8294
+ (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
8295
+ that VSX registers are not considered. Use AINT mode iterator
8296
+ instead of INT1 to allow inclusion of quad word atomic operations
8298
+ (load_locked<mode>): Likewise.
8299
+ (store_conditional<mode>): Likewise.
8300
+ (atomic_compare_and_swap<mode>): Likewise.
8301
+ (atomic_exchange<mode>): Likewise.
8302
+ (atomic_nand<mode>): Likewise.
8303
+ (atomic_fetch_<fetchop_name><mode>): Likewise.
8304
+ (atomic_nand_fetch<mode>): Likewise.
8305
+ (mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
8307
+ (ATOMIC): On power8, add QImode, HImode modes.
8308
+ (load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
8309
+ modes that promote to SImode.
8310
+ (load_lockedti): Convert TImode arguments to PTImode, so that we
8311
+ get a guaranteed even/odd register pair.
8312
+ (load_lockedpti): Likewise.
8313
+ (store_conditionalti): Likewise.
8314
+ (store_conditionalpti): Likewise.
8316
+ * config/rs6000/rs6000.md (QHI): New mode iterator for power8
8317
+ atomic load/store instructions.
8321
+ * config/rs6000/driver-rs6000.c (elf_platform): Make buffer static
8322
+ to allow returning address to AT_PLATFORM name.
8324
+ Back port from the trunk
8326
+ 2013-06-10 Michael Meissner <meissner@linux.vnet.ibm.com>
8327
+ Pat Haugen <pthaugen@us.ibm.com>
8328
+ Peter Bergner <bergner@vnet.ibm.com>
8330
+ * config/rs6000/vector.md (GPR move splitter): Do not split moves
8331
+ of vectors in GPRS if they are direct moves or quad word load or
8334
+ * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add
8336
+ (direct_move_p): Likewise.
8337
+ (quad_load_store_p): Likewise.
8339
+ * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register
8340
+ classes into bins based on the physical register type.
8341
+ (reg_class_to_reg_type): Likewise.
8342
+ (IS_STD_REG_TYPE): Likewise.
8343
+ (IS_FP_VECT_REG_TYPE): Likewise.
8344
+ (reload_fpr_gpr): Arrays to determine what insn to use if we can
8345
+ use direct move instructions.
8346
+ (reload_gpr_vsx): Likewise.
8347
+ (reload_vsx_gpr): Likewise.
8348
+ (rs6000_init_hard_regno_mode_ok): Precalculate the register type
8349
+ information that is a simplification of register classes. Also
8350
+ precalculate direct move reload helpers.
8351
+ (direct_move_p): New function to return true if the operation can
8352
+ be done as a direct move instruciton.
8353
+ (quad_load_store_p): New function to return true if the operation
8354
+ is a quad memory operation.
8355
+ (rs6000_legitimize_address): If quad memory, only allow register
8356
+ indirect for TImode addresses.
8357
+ (rs6000_legitimate_address_p): Likewise.
8358
+ (enum reload_reg_type): Delete, replace with rs6000_reg_type.
8359
+ (rs6000_reload_register_type): Likewise.
8360
+ (register_to_reg_type): Return register type.
8361
+ (rs6000_secondary_reload_simple_move): New helper function for
8362
+ secondary reload and secondary memory needed to identify anything
8363
+ that is a simple move, and does not need reloading.
8364
+ (rs6000_secondary_reload_direct_move): New helper function for
8365
+ secondary reload to identify cases that can be done with several
8366
+ instructions via the direct move instructions.
8367
+ (rs6000_secondary_reload_move): New helper function for secondary
8368
+ reload to identify moves between register types that can be done.
8369
+ (rs6000_secondary_reload): Add support for quad memory operations
8370
+ and for direct move.
8371
+ (rs6000_secondary_memory_needed): Likewise.
8372
+ (rs6000_debug_secondary_memory_needed): Change argument names.
8373
+ (rs6000_output_move_128bit): New function to return the move to
8374
+ use for 128-bit moves, including knowing about the various
8375
+ limitations of quad memory operations.
8377
+ * config/rs6000/vsx.md (vsx_mov<mode>): Add support for quad
8378
+ memory operations. call rs6000_output_move_128bit for the actual
8379
+ instruciton(s) to generate.
8380
+ (vsx_movti_64bit): Likewise.
8382
+ * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values.
8383
+ (UNSPEC_P8V_MTVSRWZ): Likewise.
8384
+ (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise.
8385
+ (UNSPEC_P8V_MTVSRD): Likewise.
8386
+ (UNSPEC_P8V_XXPERMDI): Likewise.
8387
+ (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise.
8388
+ (UNSPEC_FUSION_GPR): Likewise.
8389
+ (FMOVE128_GPR): New iterator for direct move.
8390
+ (f32_lv): New mode attribute for load/store of SFmode/SDmode
8392
+ (f32_sv): Likewise.
8393
+ (f32_dm): Likewise.
8394
+ (zero_extend<mode>di2_internal1): Add support for power8 32-bit
8395
+ loads and direct move instructions.
8396
+ (zero_extendsidi2_lfiwzx): Likewise.
8397
+ (extendsidi2_lfiwax): Likewise.
8398
+ (extendsidi2_nocell): Likewise.
8399
+ (floatsi<mode>2_lfiwax): Likewise.
8400
+ (lfiwax): Likewise.
8401
+ (floatunssi<mode>2_lfiwzx): Likewise.
8402
+ (lfiwzx): Likewise.
8403
+ (fix_trunc<mode>_stfiwx): Likewise.
8404
+ (fixuns_trunc<mode>_stfiwx): Likewise.
8405
+ (mov<mode>_hardfloat, 32-bit floating point): Likewise.
8406
+ (mov<move>_hardfloat64, 64-bit floating point): Likewise.
8407
+ (parity<mode>2_cmpb): Set length/type attr.
8408
+ (unnamed shift right patterns, mov<mode>_internal2): Change type attr
8409
+ for 'mr.' to fast_compare.
8410
+ (bpermd_<mode>): Change type attr to popcnt.
8411
+ (p8_fmrgow_<mode>): New insns for power8 direct move support.
8412
+ (p8_mtvsrwz_1): Likewise.
8413
+ (p8_mtvsrwz_2): Likewise.
8414
+ (reload_fpr_from_gpr<mode>): Likewise.
8415
+ (p8_mtvsrd_1): Likewise.
8416
+ (p8_mtvsrd_2): Likewise.
8417
+ (p8_xxpermdi_<mode>): Likewise.
8418
+ (reload_vsx_from_gpr<mode>): Likewise.
8419
+ (reload_vsx_from_gprsf): Likewise.
8420
+ (p8_mfvsrd_3_<mode>): LIkewise.
8421
+ (reload_gpr_from_vsx<mode>): Likewise.
8422
+ (reload_gpr_from_vsxsf): Likewise.
8423
+ (p8_mfvsrd_4_disf): Likewise.
8424
+ (multi-word GPR splits): Do not split direct moves or quad memory
8427
+2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
8429
+ Backport from the trunk
8431
+ 2013-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
8432
+ Pat Haugen <pthaugen@us.ibm.com>
8433
+ Peter Bergner <bergner@vnet.ibm.com>
8435
+ * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
8436
+ Document new power8 builtins.
8438
+ * config/rs6000/vector.md (and<mode>3): Add a clobber/scratch of a
8439
+ condition code register, to allow 128-bit logical operations to be
8440
+ done in the VSX or GPR registers.
8441
+ (nor<mode>3): Use the canonical form for nor.
8442
+ (eqv<mode>3): Add expanders for power8 xxleqv, xxlnand, xxlorc,
8443
+ vclz*, and vpopcnt* vector instructions.
8444
+ (nand<mode>3): Likewise.
8445
+ (orc<mode>3): Likewise.
8446
+ (clz<mode>2): LIkewise.
8447
+ (popcount<mode>2): Likewise.
8449
+ * config/rs6000/predicates.md (int_reg_operand): Rework tests so
8450
+ that only the GPRs are recognized.
8452
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
8453
+ support for new power8 builtins.
8455
+ * config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8
8456
+ builtin functions.
8457
+ (xscvdpspn): Likewise.
8459
+ (vclzb): Likewise.
8460
+ (vclzh): Likewise.
8461
+ (vclzw): Likewise.
8462
+ (vclzd): Likewise.
8463
+ (vpopcnt): Likewise.
8464
+ (vpopcntb): Likewise.
8465
+ (vpopcnth): Likewise.
8466
+ (vpopcntw): Likewise.
8467
+ (vpopcntd): Likewise.
8468
+ (vgbbd): Likewise.
8469
+ (vmrgew): Likewise.
8470
+ (vmrgow): Likewise.
8472
+ (eqv_v16qi3): Likewise.
8473
+ (eqv_v8hi3): Likewise.
8474
+ (eqv_v4si3): Likewise.
8475
+ (eqv_v2di3): Likewise.
8476
+ (eqv_v4sf3): Likewise.
8477
+ (eqv_v2df3): Likewise.
8479
+ (nand_v16qi3): Likewise.
8480
+ (nand_v8hi3): Likewise.
8481
+ (nand_v4si3): Likewise.
8482
+ (nand_v2di3): Likewise.
8483
+ (nand_v4sf3): Likewise.
8484
+ (nand_v2df3): Likewise.
8486
+ (orc_v16qi3): Likewise.
8487
+ (orc_v8hi3): Likewise.
8488
+ (orc_v4si3): Likewise.
8489
+ (orc_v2di3): Likewise.
8490
+ (orc_v4sf3): Likewise.
8491
+ (orc_v2df3): Likewise.
8493
+ * config/rs6000/rs6000.c (rs6000_option_override_internal): Only
8494
+ allow power8 quad mode in 64-bit.
8495
+ (rs6000_builtin_vectorized_function): Add support to vectorize
8496
+ ISA 2.07 count leading zeros, population count builtins.
8497
+ (rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form
8498
+ V4SF vectors instead of xscvdpsp to avoid IEEE related traps.
8499
+ (builtin_function_type): Add vgbbd builtin function which takes an
8500
+ unsigned argument.
8501
+ (altivec_expand_vec_perm_const): Add support for new power8 merge
8504
+ * config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types,
8505
+ that does not include TImdoe for use with 32-bit.
8506
+ (UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn
8508
+ (UNSPEC_VSX_CVDPSPN): Likewise.
8509
+ (vsx_xscvdpspn): Likewise.
8510
+ (vsx_xscvspdpn): Likewise.
8511
+ (vsx_xscvdpspn_scalar): Likewise.
8512
+ (vsx_xscvspdpn_directmove): Likewise.
8513
+ (vsx_and<mode>3): Split logical operations into 32-bit and
8514
+ 64-bit. Add support to do logical operations on TImode as well as
8515
+ VSX vector types. Allow logical operations to be done in either
8516
+ VSX registers or in general purpose registers in 64-bit mode. Add
8517
+ splitters if GPRs were used. For AND, add clobber of CCmode to
8518
+ allow use of ANDI on GPRs. Rewrite nor to use the canonical RTL
8520
+ (vsx_and<mode>3_32bit): Likewise.
8521
+ (vsx_and<mode>3_64bit): Likewise.
8522
+ (vsx_ior<mode>3): Likewise.
8523
+ (vsx_ior<mode>3_32bit): Likewise.
8524
+ (vsx_ior<mode>3_64bit): Likewise.
8525
+ (vsx_xor<mode>3): Likewise.
8526
+ (vsx_xor<mode>3_32bit): Likewise.
8527
+ (vsx_xor<mode>3_64bit): Likewise.
8528
+ (vsx_one_cmpl<mode>2): Likewise.
8529
+ (vsx_one_cmpl<mode>2_32bit): Likewise.
8530
+ (vsx_one_cmpl<mode>2_64bit): Likewise.
8531
+ (vsx_nor<mode>3): Likewise.
8532
+ (vsx_nor<mode>3_32bit): Likewise.
8533
+ (vsx_nor<mode>3_64bit): Likewise.
8534
+ (vsx_andc<mode>3): Likewise.
8535
+ (vsx_andc<mode>3_32bit): Likewise.
8536
+ (vsx_andc<mode>3_64bit): Likewise.
8537
+ (vsx_eqv<mode>3_32bit): Add support for power8 xxleqv, xxlnand,
8538
+ and xxlorc instructions.
8539
+ (vsx_eqv<mode>3_64bit): Likewise.
8540
+ (vsx_nand<mode>3_32bit): Likewise.
8541
+ (vsx_nand<mode>3_64bit): Likewise.
8542
+ (vsx_orc<mode>3_32bit): Likewise.
8543
+ (vsx_orc<mode>3_64bit): Likewise.
8545
+ * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment.
8547
+ * config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd
8549
+ (p8_vmrgew): Add power8 vmrgew and vmrgow instructions.
8550
+ (p8_vmrgow): Likewise.
8551
+ (altivec_and<mode>3): Add clobber of CCmode to allow AND using
8552
+ GPRs to be split under VSX.
8553
+ (p8v_clz<mode>2): Add power8 count leading zero support.
8554
+ (p8v_popcount<mode>2): Add power8 population count support.
8555
+ (p8v_vgbbd): Add power8 gather bits by bytes by doubleword
8558
+ * config/rs6000/rs6000.md (eqv<mode>3): Add support for powerp eqv
8561
+ * config/rs6000/altivec.h (vec_eqv): Add defines to export power8
8562
+ builtin functions.
8563
+ (vec_nand): Likewise.
8564
+ (vec_vclz): Likewise.
8565
+ (vec_vclzb): Likewise.
8566
+ (vec_vclzd): Likewise.
8567
+ (vec_vclzh): Likewise.
8568
+ (vec_vclzw): Likewise.
8569
+ (vec_vgbbd): Likewise.
8570
+ (vec_vmrgew): Likewise.
8571
+ (vec_vmrgow): Likewise.
8572
+ (vec_vpopcnt): Likewise.
8573
+ (vec_vpopcntb): Likewise.
8574
+ (vec_vpopcntd): Likewise.
8575
+ (vec_vpopcnth): Likewise.
8576
+ (vec_vpopcntw): Likewise.
8578
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
8580
+ Merge up to 199753.
8581
+ * REVISION: Update subversion id.
8583
+2013-06-06 Peter Bergner <bergner@vnet.ibm.com>
8585
+ Backport from trunk
8587
+ 2013-05-29 Michael Meissner <meissner@linux.vnet.ibm.com>
8588
+ Pat Haugen <pthaugen@us.ibm.com>
8589
+ Peter Bergner <bergner@vnet.ibm.com>
8591
+ * config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI
8593
+ (VEC_A): Likewise.
8594
+ (VEC_C): Likewise.
8595
+ (vrotl<mode>3): Likewise.
8596
+ (vashl<mode>3): Likewise.
8597
+ (vlshr<mode>3): Likewise.
8598
+ (vashr<mode>3): Likewise.
8600
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
8601
+ support for power8 V2DI builtins.
8603
+ * config/rs6000/rs6000-builtin.def (abs_v2di): Add support for
8604
+ power8 V2DI builtins.
8605
+ (vupkhsw): Likewise.
8606
+ (vupklsw): Likewise.
8607
+ (vaddudm): Likewise.
8608
+ (vminsd): Likewise.
8609
+ (vmaxsd): Likewise.
8610
+ (vminud): Likewise.
8611
+ (vmaxud): Likewise.
8612
+ (vpkudum): Likewise.
8613
+ (vpksdss): Likewise.
8614
+ (vpkudus): Likewise.
8615
+ (vpksdus): Likewise.
8619
+ (vsrad): Likewise.
8620
+ (vsubudm): Likewise.
8621
+ (vcmpequd): Likewise.
8622
+ (vcmpgtsd): Likewise.
8623
+ (vcmpgtud): Likewise.
8624
+ (vcmpequd_p): Likewise.
8625
+ (vcmpgtsd_p): Likewise.
8626
+ (vcmpgtud_p): Likewise.
8627
+ (vupkhsw): Likewise.
8628
+ (vupklsw): Likewise.
8629
+ (vaddudm): Likewise.
8630
+ (vmaxsd): Likewise.
8631
+ (vmaxud): Likewise.
8632
+ (vminsd): Likewise.
8633
+ (vminud): Likewise.
8634
+ (vpksdss): Likewise.
8635
+ (vpksdus): Likewise.
8636
+ (vpkudum): Likewise.
8637
+ (vpkudus): Likewise.
8640
+ (vsrad): Likewise.
8642
+ (vsubudm): Likewise.
8644
+ * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add
8645
+ support for power8 V2DI instructions.
8647
+ * config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for
8648
+ power8 V2DI instructions. Combine pack and unpack insns to use an
8649
+ iterator for each mode. Check whether a particular mode supports
8650
+ Altivec instructions instead of just checking TARGET_ALTIVEC.
8651
+ (UNSPEC_VPKUWUM): Likewise.
8652
+ (UNSPEC_VPKSHSS): Likewise.
8653
+ (UNSPEC_VPKSWSS): Likewise.
8654
+ (UNSPEC_VPKUHUS): Likewise.
8655
+ (UNSPEC_VPKSHUS): Likewise.
8656
+ (UNSPEC_VPKUWUS): Likewise.
8657
+ (UNSPEC_VPKSWUS): Likewise.
8658
+ (UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise.
8659
+ (UNSPEC_VPACK_SIGN_UNS_SAT): Likewise.
8660
+ (UNSPEC_VPACK_UNS_UNS_SAT): Likewise.
8661
+ (UNSPEC_VPACK_UNS_UNS_MOD): Likewise.
8662
+ (UNSPEC_VUPKHSB): Likewise.
8663
+ (UNSPEC_VUNPACK_HI_SIGN): Likewise.
8664
+ (UNSPEC_VUNPACK_LO_SIGN): Likewise.
8665
+ (UNSPEC_VUPKHSH): Likewise.
8666
+ (UNSPEC_VUPKLSB): Likewise.
8667
+ (UNSPEC_VUPKLSH): Likewise.
8669
+ (VI_char): Likewise.
8670
+ (VI_scalar): Likewise.
8671
+ (VI_unit): Likewise.
8673
+ (VP_small): Likewise.
8674
+ (VP_small_lc): Likewise.
8675
+ (VU_char): Likewise.
8676
+ (add<mode>3): Likewise.
8677
+ (altivec_vaddcuw): Likewise.
8678
+ (altivec_vaddu<VI_char>s): Likewise.
8679
+ (altivec_vadds<VI_char>s): Likewise.
8680
+ (sub<mode>3): Likewise.
8681
+ (altivec_vsubcuw): Likewise.
8682
+ (altivec_vsubu<VI_char>s): Likewise.
8683
+ (altivec_vsubs<VI_char>s): Likewise.
8684
+ (altivec_vavgs<VI_char>): Likewise.
8685
+ (altivec_vcmpbfp): Likewise.
8686
+ (altivec_eq<mode>): Likewise.
8687
+ (altivec_gt<mode>): Likewise.
8688
+ (altivec_gtu<mode>): Likewise.
8689
+ (umax<mode>3): Likewise.
8690
+ (smax<mode>3): Likewise.
8691
+ (umin<mode>3): Likewise.
8692
+ (smin<mode>3): Likewise.
8693
+ (altivec_vpkuhum): Likewise.
8694
+ (altivec_vpkuwum): Likewise.
8695
+ (altivec_vpkshss): Likewise.
8696
+ (altivec_vpkswss): Likewise.
8697
+ (altivec_vpkuhus): Likewise.
8698
+ (altivec_vpkshus): Likewise.
8699
+ (altivec_vpkuwus): Likewise.
8700
+ (altivec_vpkswus): Likewise.
8701
+ (altivec_vpks<VI_char>ss): Likewise.
8702
+ (altivec_vpks<VI_char>us): Likewise.
8703
+ (altivec_vpku<VI_char>us): Likewise.
8704
+ (altivec_vpku<VI_char>um): Likewise.
8705
+ (altivec_vrl<VI_char>): Likewise.
8706
+ (altivec_vsl<VI_char>): Likewise.
8707
+ (altivec_vsr<VI_char>): Likewise.
8708
+ (altivec_vsra<VI_char>): Likewise.
8709
+ (altivec_vsldoi_<mode>): Likewise.
8710
+ (altivec_vupkhsb): Likewise.
8711
+ (altivec_vupkhs<VU_char>): Likewise.
8712
+ (altivec_vupkls<VU_char>): Likewise.
8713
+ (altivec_vupkhsh): Likewise.
8714
+ (altivec_vupklsb): Likewise.
8715
+ (altivec_vupklsh): Likewise.
8716
+ (altivec_vcmpequ<VI_char>_p): Likewise.
8717
+ (altivec_vcmpgts<VI_char>_p): Likewise.
8718
+ (altivec_vcmpgtu<VI_char>_p): Likewise.
8719
+ (abs<mode>2): Likewise.
8720
+ (vec_unpacks_hi_v16qi): Likewise.
8721
+ (vec_unpacks_hi_v8hi): Likewise.
8722
+ (vec_unpacks_lo_v16qi): Likewise.
8723
+ (vec_unpacks_hi_<VP_small_lc>): Likewise.
8724
+ (vec_unpacks_lo_v8hi): Likewise.
8725
+ (vec_unpacks_lo_<VP_small_lc>): Likewise.
8726
+ (vec_pack_trunc_v8h): Likewise.
8727
+ (vec_pack_trunc_v4si): Likewise.
8728
+ (vec_pack_trunc_<mode>): Likewise.
8730
+ * config/rs6000/altivec.h (vec_vaddudm): Add defines for power8
8732
+ (vec_vmaxsd): Likewise.
8733
+ (vec_vmaxud): Likewise.
8734
+ (vec_vminsd): Likewise.
8735
+ (vec_vminud): Likewise.
8736
+ (vec_vpksdss): Likewise.
8737
+ (vec_vpksdus): Likewise.
8738
+ (vec_vpkudum): Likewise.
8739
+ (vec_vpkudus): Likewise.
8740
+ (vec_vrld): Likewise.
8741
+ (vec_vsld): Likewise.
8742
+ (vec_vsrad): Likewise.
8743
+ (vec_vsrd): Likewise.
8744
+ (vec_vsubudm): Likewise.
8745
+ (vec_vupkhsw): Likewise.
8746
+ (vec_vupklsw): Likewise.
8748
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
8749
+ Pat Haugen <pthaugen@us.ibm.com>
8750
+ Peter Bergner <bergner@vnet.ibm.com>
8752
+ * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add
8753
+ documentation for the power8 crypto builtins.
8755
+ * config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md.
8757
+ * config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support
8758
+ macros for defining power8 builtin functions.
8759
+ (BU_P8V_AV_2): Likewise.
8760
+ (BU_P8V_AV_P): Likewise.
8761
+ (BU_P8V_VSX_1): Likewise.
8762
+ (BU_P8V_OVERLOAD_1): Likewise.
8763
+ (BU_P8V_OVERLOAD_2): Likewise.
8764
+ (BU_CRYPTO_1): Likewise.
8765
+ (BU_CRYPTO_2): Likewise.
8766
+ (BU_CRYPTO_3): Likewise.
8767
+ (BU_CRYPTO_OVERLOAD_1): Likewise.
8768
+ (BU_CRYPTO_OVERLOAD_2): Likewise.
8769
+ (XSCVSPDP): Fix typo, point to the correct instruction.
8770
+ (VCIPHER): Add power8 crypto builtins.
8771
+ (VCIPHERLAST): Likewise.
8772
+ (VNCIPHER): Likewise.
8773
+ (VNCIPHERLAST): Likewise.
8774
+ (VPMSUMB): Likewise.
8775
+ (VPMSUMH): Likewise.
8776
+ (VPMSUMW): Likewise.
8777
+ (VPERMXOR_V2DI): Likewise.
8778
+ (VPERMXOR_V4SI: Likewise.
8779
+ (VPERMXOR_V8HI: Likewise.
8780
+ (VPERMXOR_V16QI: Likewise.
8781
+ (VSHASIGMAW): Likewise.
8782
+ (VSHASIGMAD): Likewise.
8783
+ (VPMSUM): Likewise.
8784
+ (VPERMXOR): Likewise.
8785
+ (VSHASIGMA): Likewise.
8787
+ * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
8788
+ __CRYPTO__ if the crypto instructions are available.
8789
+ (altivec_overloaded_builtins): Add support for overloaded power8
8792
+ * config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add
8793
+ support for power8 crypto builtins.
8794
+ (builtin_function_type): Likewise.
8795
+ (altivec_init_builtins): Add support for builtins that take vector
8796
+ long long (V2DI) arguments.
8798
+ * config/rs6000/crypto.md: New file, define power8 crypto
8801
+ 2013-05-22 Michael Meissner <meissner@linux.vnet.ibm.com>
8802
+ Pat Haugen <pthaugen@us.ibm.com>
8803
+ Peter Bergner <bergner@vnet.ibm.com>
8805
+ * doc/invoke.texi (Option Summary): Add power8 options.
8806
+ (RS/6000 and PowerPC Options): Likewise.
8808
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use
8809
+ constraints.md instead of rs6000.h. Reorder w* constraints. Add
8810
+ wm, wn, wr documentation.
8812
+ * gcc/config/rs6000/constraints.md (wm): New constraint for VSX
8813
+ registers if direct move instructions are enabled.
8814
+ (wn): New constraint for no registers.
8815
+ (wq): New constraint for quad word even GPR registers.
8816
+ (wr): New constraint if 64-bit instructions are enabled.
8817
+ (wv): New constraint if power8 vector instructions are enabled.
8818
+ (wQ): New constraint for quad word memory locations.
8820
+ * gcc/config/rs6000/predicates.md (const_0_to_15_operand): New
8821
+ constraint for 0..15 for crypto instructions.
8822
+ (gpc_reg_operand): If VSX allow registers in VSX registers as well
8823
+ as GPR and floating point registers.
8824
+ (int_reg_operand): New predicate to match only GPR registers.
8825
+ (base_reg_operand): New predicate to match base registers.
8826
+ (quad_int_reg_operand): New predicate to match even GPR registers
8827
+ for quad memory operations.
8828
+ (vsx_reg_or_cint_operand): New predicate to allow vector logical
8829
+ operations in both GPR and VSX registers.
8830
+ (quad_memory_operand): New predicate for quad memory operations.
8831
+ (reg_or_indexed_operand): New predicate for direct move support.
8833
+ * gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED):
8834
+ Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS.
8835
+ (ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8).
8836
+ (POWERPC_MASKS): Add power8 options.
8837
+ (power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the
8840
+ * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
8841
+ Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8.
8843
+ * gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation.
8844
+ (-mpower8-fusion): New power8 options.
8845
+ (-mpower8-fusion-sign): Likewise.
8846
+ (-mpower8-vector): Likewise.
8847
+ (-mcrypto): Likewise.
8848
+ (-mdirect-move): Likewise.
8849
+ (-mquad-memory): Likewise.
8851
+ * gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for
8853
+ (rs6000_hard_regno_mode_ok): Make PTImode only match even GPR
8855
+ (rs6000_debug_reg_print): Print the base register class if
8857
+ (rs6000_debug_vector_unit): Add p8_vector.
8858
+ (rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint
8859
+ definitions. Also print fusion state.
8860
+ (rs6000_init_hard_regno_mode_ok): Set up power8 constraints.
8861
+ (rs6000_builtin_mask_calculate): Add power8 builtin support.
8862
+ (rs6000_option_override_internal): Add support for power8.
8863
+ (rs6000_common_init_builtins): Add debugging for skipped builtins
8864
+ if -mdebug=builtin.
8865
+ (rs6000_adjust_cost): Add power8 support.
8866
+ (rs6000_issue_rate): Likewise.
8867
+ (insn_must_be_first_in_group): Likewise.
8868
+ (insn_must_be_last_in_group): Likewise.
8869
+ (force_new_group): Likewise.
8870
+ (rs6000_register_move_cost): Likewise.
8871
+ (rs6000_opt_masks): Likewise.
8873
+ * config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a
8874
+ power8 capable assembler, default to power7 options.
8875
+ (TARGET_DIRECT_MOVE): Likewise.
8876
+ (TARGET_CRYPTO): Likewise.
8877
+ (TARGET_P8_VECTOR): Likewise.
8878
+ (VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support.
8879
+ (VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise.
8880
+ (VECTOR_MEM_P8_VECTOR_P): Likewise.
8881
+ (VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise.
8882
+ (VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise.
8883
+ (TARGET_XSCVDPSPN): Likewise.
8884
+ (TARGET_XSCVSPDPN): Likewsie.
8885
+ (TARGET_SYNC_HI_QI): Likewise.
8886
+ (TARGET_SYNC_TI): Likewise.
8887
+ (MASK_CRYPTO): Likewise.
8888
+ (MASK_DIRECT_MOVE): Likewise.
8889
+ (MASK_P8_FUSION): Likewise.
8890
+ (MASK_P8_VECTOR): Likewise.
8891
+ (REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the
8892
+ TFmode temporary used by some of the direct move instructions to
8893
+ get two FP temporary registers does not force creation of a stack
8895
+ (VLOGICAL_REGNO_P): Allow vector logical operations in GPRs.
8896
+ (MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so
8897
+ that any VSX registers are tieable, even if they are also an
8898
+ Altivec vector mode.
8899
+ (r6000_reg_class_enum): Add wm, wr, wv constraints.
8900
+ (RS6000_BTM_P8_VECTOR): Power8 builtin support.
8901
+ (RS6000_BTM_CRYPTO): Likewise.
8902
+ (RS6000_BTM_COMMON): Likewise.
8904
+ * config/rs6000/rs6000.md (cpu attribute): Add power8.
8905
+ * config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise.
8906
+ (enum rs6000_vector): Add power8 vector support.
8908
+2013-05-06 Michael Meissner <meissner@linux.vnet.ibm.com>
8910
+ Merge up to 198656.
8911
+ * REVISION: Update subversion id.
8913
+ Backport from trunk
8914
+ 2013-05-03 Michael Meissner <meissner@linux.vnet.ibm.com>
8917
+ * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode
8918
+ to save TFmode registers and DImode to save TImode registers for
8919
+ caller save operations.
8920
+ (HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to
8921
+ mark being partially clobbered since they only use the first
8924
+ * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode
8925
+ and TDmode only use the upper 64-bits of each VSX register.
8927
+2013-04-09 Michael Meissner <meissner@linux.vnet.ibm.com>
8929
+ Merge up to 197642.
8930
+ * REVISION: Update subversion id.
8932
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
8934
+ Backport from mainline
8935
+ 2013-03-20 Pat Haugen <pthaugen@us.ibm.com>
8937
+ * config/rs6000/predicates.md (indexed_address, update_address_mem
8938
+ update_indexed_address_mem): New predicates.
8939
+ * config/rs6000/vsx.md (vsx_extract_<mode>_zero): Set correct "type"
8940
+ attribute for load/store instructions.
8941
+ * config/rs6000/dfp.md (movsd_store): Likewise.
8942
+ (movsd_load): Likewise.
8943
+ * config/rs6000/rs6000.md (zero_extend<mode>di2_internal1): Likewise.
8944
+ (unnamed HI->DI extend define_insn): Likewise.
8945
+ (unnamed SI->DI extend define_insn): Likewise.
8946
+ (unnamed QI->SI extend define_insn): Likewise.
8947
+ (unnamed QI->HI extend define_insn): Likewise.
8948
+ (unnamed HI->SI extend define_insn): Likewise.
8949
+ (unnamed HI->SI extend define_insn): Likewise.
8950
+ (extendsfdf2_fpr): Likewise.
8951
+ (movsi_internal1): Likewise.
8952
+ (movsi_internal1_single): Likewise.
8953
+ (movhi_internal): Likewise.
8954
+ (movqi_internal): Likewise.
8955
+ (movcc_internal1): Correct mnemonic for stw insn. Set correct "type"
8956
+ attribute for load/store instructions.
8957
+ (mov<mode>_hardfloat): Set correct "type" attribute for load/store
8959
+ (mov<mode>_softfloat): Likewise.
8960
+ (mov<mode>_hardfloat32): Likewise.
8961
+ (mov<mode>_hardfloat64): Likewise.
8962
+ (mov<mode>_softfloat64): Likewise.
8963
+ (movdi_internal32): Likewise.
8964
+ (movdi_internal64): Likewise.
8965
+ (probe_stack_<mode>): Likewise.
8967
+ Backport from mainline
8968
+ 2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
8970
+ * config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary
8971
+ floating point, and decimal floating point to reload iterator.
8973
+ * config/rs6000/constraints.md (wl constraint): New constraints to
8974
+ return FLOAT_REGS if certain options are used to reduce the number
8975
+ of separate patterns that exist in the file.
8976
+ (wx constraint): Likewise.
8977
+ (wz constraint): Likewise.
8979
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): If
8980
+ -mdebug=reg, print wg, wl, wx, and wz constraints.
8981
+ (rs6000_init_hard_regno_mode_ok): Initialize new constraints.
8982
+ Initialize the reload functions for 64-bit binary/decimal floating
8984
+ (reg_offset_addressing_ok_p): If we are on a power7 or later, use
8985
+ LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
8986
+ create the buffer on the stack to overcome not having a 32-bit
8988
+ (rs6000_emit_move): Likewise.
8989
+ (rs6000_secondary_memory_needed_rtx): Likewise.
8990
+ (rs6000_alloc_sdmode_stack_slot): Likewise.
8991
+ (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
8992
+ via xxlxor, just like DFmode 0.0.
8994
+ * config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro,
8995
+ define as 1 if we are running on a power7 or newer.
8996
+ (enum r6000_reg_class_enum): Add new constraints.
8998
+ * config/rs6000/dfp.md (movsd): Delete, combine with binary
8999
+ floating point moves in rs6000.md. Combine power6x (mfpgpr) moves
9000
+ with other moves by using conditional constraits (wg). Use LFIWZX
9001
+ and STFIWX for loading SDmode on power7. Use xxlxor to create
9003
+ (movsd splitter): Likewise.
9004
+ (movsd_hardfloat): Likewise.
9005
+ (movsd_softfloat): Likewise.
9007
+ * config/rs6000/rs6000.md (FMOVE32): New iterators to combine
9008
+ binary and decimal floating point moves.
9009
+ (fmove_ok): New attributes to combine binary and decimal floating
9010
+ point moves, and to combine power6x (mfpgpr) moves along normal
9012
+ (real_value_to_target): Likewise.
9013
+ (f32_lr): Likewise.
9014
+ (f32_lm): Likewise.
9015
+ (f32_li): Likewise.
9016
+ (f32_sr): Likewise.
9017
+ (f32_sm): Likewise.
9018
+ (f32_si): Likewise.
9019
+ (movsf): Combine binary and decimal floating point moves. Combine
9020
+ power6x (mfpgpr) moves with other moves by using conditional
9021
+ constraits (wg). Use LFIWZX and STFIWX for loading SDmode on
9023
+ (mov<mode> for SFmode/SDmode); Likewise.
9024
+ (SFmode/SDmode splitters): Likewise.
9025
+ (movsf_hardfloat): Likewise.
9026
+ (mov<mode>_hardfloat for SFmode/SDmode): Likewise.
9027
+ (movsf_softfloat): Likewise.
9028
+ (mov<mode>_softfloat for SFmode/SDmode): Likewise.
9030
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl,
9031
+ wx and wz constraints.
9033
+ * config/rs6000/constraints.md (wg constraint): New constraint to
9034
+ return FLOAT_REGS if -mmfpgpr (power6x) was used.
9036
+ * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg
9039
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): If
9040
+ -mdebug=reg, print wg, wl, wx, and wz constraints.
9041
+ (rs6000_init_hard_regno_mode_ok): Initialize new constraints.
9042
+ Initialize the reload functions for 64-bit binary/decimal floating
9044
+ (reg_offset_addressing_ok_p): If we are on a power7 or later, use
9045
+ LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
9046
+ create the buffer on the stack to overcome not having a 32-bit
9048
+ (rs6000_emit_move): Likewise.
9049
+ (rs6000_secondary_memory_needed_rtx): Likewise.
9050
+ (rs6000_alloc_sdmode_stack_slot): Likewise.
9051
+ (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
9052
+ via xxlxor, just like DFmode 0.0.
9055
+ * config/rs6000/dfp.md (movdd): Delete, combine with binary
9056
+ floating point moves in rs6000.md. Combine power6x (mfpgpr) moves
9057
+ with other moves by using conditional constraits (wg). Use LFIWZX
9058
+ and STFIWX for loading SDmode on power7.
9059
+ (movdd splitters): Likewise.
9060
+ (movdd_hardfloat32): Likewise.
9061
+ (movdd_softfloat32): Likewise.
9062
+ (movdd_hardfloat64_mfpgpr): Likewise.
9063
+ (movdd_hardfloat64): Likewise.
9064
+ (movdd_softfloat64): Likewise.
9066
+ * config/rs6000/rs6000.md (FMOVE64): New iterators to combine
9067
+ 64-bit binary and decimal floating point moves.
9068
+ (FMOVE64X): Likewise.
9069
+ (movdf): Combine 64-bit binary and decimal floating point moves.
9070
+ Combine power6x (mfpgpr) moves with other moves by using
9071
+ conditional constraits (wg).
9072
+ (mov<mode> for DFmode/DDmode): Likewise.
9073
+ (DFmode/DDmode splitters): Likewise.
9074
+ (movdf_hardfloat32): Likewise.
9075
+ (mov<mode>_hardfloat32 for DFmode/DDmode): Likewise.
9076
+ (movdf_softfloat32): Likewise.
9077
+ (movdf_hardfloat64_mfpgpr): Likewise.
9078
+ (movdf_hardfloat64): Likewise.
9079
+ (mov<mode>_hardfloat64 for DFmode/DDmode): Likewise.
9080
+ (movdf_softfloat64): Likewise.
9081
+ (mov<mode>_softfloat64 for DFmode/DDmode): Likewise.
9082
+ (reload_<mode>_load): Move to later in the file so they aren't in
9083
+ the middle of the floating point move insns.
9084
+ (reload_<mode>_store): Likewise.
9086
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg
9089
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg
9090
+ constraint if -mdebug=reg.
9091
+ (rs6000_initi_hard_regno_mode_ok): Enable wg constraint if
9092
+ -mfpgpr. Enable using dd reload support if needed.
9094
+ * config/rs6000/dfp.md (movtd): Delete, combine with 128-bit
9095
+ binary and decimal floating point moves in rs6000.md.
9096
+ (movtd_internal): Likewise.
9098
+ * config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and
9099
+ decimal floating point moves.
9100
+ (movtf): Likewise.
9101
+ (movtf_internal): Likewise.
9102
+ (mov<mode>_internal, TDmode/TFmode): Likewise.
9103
+ (movtf_softfloat): Likewise.
9104
+ (mov<mode>_softfloat, TDmode/TFmode): Likewise.
9106
+ * config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with
9107
+ movdi_internal64, using wg constraint for move direct operations.
9108
+ (movdi_internal64): Likewise.
9110
+ * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print
9111
+ MODES_TIEABLE_P for selected modes. Print the numerical value of
9112
+ the various virtual registers. Use GPR/FPR first/last values,
9113
+ instead of hard coding the register numbers. Print which modes
9114
+ have reload functions registered.
9115
+ (rs6000_option_override_internal): If -mdebug=reg, trace the
9116
+ options settings before/after setting cpu, target and subtarget
9118
+ (rs6000_secondary_reload_trace): Improve the RTL dump for
9119
+ -mdebug=addr and for secondary reload failures in
9120
+ rs6000_secondary_reload_inner.
9121
+ (rs6000_secondary_reload_fail): Likewise.
9122
+ (rs6000_secondary_reload_inner): Likewise.
9124
+ * config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience
9125
+ macros for first/last GPR and FPR registers.
9126
+ (LAST_GPR_REGNO): Likewise.
9127
+ (FIRST_FPR_REGNO): Likewise.
9128
+ (LAST_FPR_REGNO): Likewise.
9130
+ * config/rs6000/vector.md (mul<mode>3): Use the combined macro
9131
+ VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to
9132
+ VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P.
9133
+ (vcond<mode><mode>): Likewise.
9134
+ (vcondu<mode><mode>): Likewise.
9135
+ (vector_gtu<mode>): Likewise.
9136
+ (vector_gte<mode>): Likewise.
9137
+ (xor<mode>3): Don't allow logical operations on TImode in 32-bit
9138
+ to prevent the compiler from converting DImode operations to
9140
+ (ior<mode>3): Likewise.
9141
+ (and<mode>3): Likewise.
9142
+ (one_cmpl<mode>2): Likewise.
9143
+ (nor<mode>3): Likewise.
9144
+ (andc<mode>3): Likewise.
9146
+ * config/rs6000/constraints.md (wt constraint): New constraint
9147
+ that returns VSX_REGS if TImode is allowed in VSX registers.
9149
+ * config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy
9150
+ constant under VSX.
9152
+ * config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is
9153
+ similar to TImode, but it is restricted to being in the GPRs.
9155
+ * config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow
9156
+ TImode to occupy a single VSX register.
9158
+ * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to
9159
+ -mvsx-timode for power7/power8.
9160
+ (power7 cpu): Likewise.
9161
+ (power8 cpu): Likewise.
9163
+ * config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make
9164
+ sure that TFmode/TDmode take up two registers if they are ever
9165
+ allowed in the upper VSX registers.
9166
+ (rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX
9168
+ (rs6000_init_hard_regno_mode_ok): Likewise.
9169
+ (rs6000_debug_reg_global): Add debugging for PTImode and wt
9170
+ constraint. Print if LRA is turned on.
9171
+ (rs6000_option_override_internal): Give an error if -mvsx-timode
9172
+ and VSX is not enabled.
9173
+ (invalid_e500_subreg): Handle PTImode, restricting it to GPRs. If
9174
+ -mvsx-timode, restrict TImode to reg+reg addressing, and PTImode
9175
+ to reg+offset addressing. Use PTImode when checking offset
9176
+ addresses for validity.
9177
+ (reg_offset_addressing_ok_p): Likewise.
9178
+ (rs6000_legitimate_offset_address_p): Likewise.
9179
+ (rs6000_legitimize_address): Likewise.
9180
+ (rs6000_legitimize_reload_address): Likewise.
9181
+ (rs6000_legitimate_address_p): Likewise.
9182
+ (rs6000_eliminate_indexed_memrefs): Likewise.
9183
+ (rs6000_emit_move): Likewise.
9184
+ (rs6000_secondary_reload): Likewise.
9185
+ (rs6000_secondary_reload_inner): Handle PTImode. Allow 64-bit
9186
+ reloads to fpr registers to continue to use reg+offset addressing,
9187
+ but 64-bit reloads to altivec registers need reg+reg addressing.
9188
+ Drop test for PRE_MODIFY, since VSX loads/stores no longer support
9189
+ it. Treat LO_SUM like a PLUS operation.
9190
+ (rs6000_secondary_reload_class): If type is 64-bit, prefer to use
9191
+ FLOAT_REGS instead of VSX_RGS to allow use of reg+offset
9193
+ (rs6000_cannot_change_mode_class): Do not allow TImode in VSX
9194
+ registers to share a register with a smaller sized type, since VSX
9195
+ puts scalars in the upper 64-bits.
9196
+ (print_operand): Add support for PTImode.
9197
+ (rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of
9198
+ VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX
9199
+ registers, but don't have arithmetic support.
9200
+ (rs6000_memory_move_cost): Add test for VSX.
9201
+ (rs6000_opt_masks): Add -mvsx-timode.
9203
+ * config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves
9206
+ (VSr): Use wt constraint for TImode.
9207
+ (VSv): Drop TImode support.
9208
+ (vsx_movti): Delete, replace with versions for 32-bit and 64-bit.
9209
+ (vsx_movti_64bit): Likewise.
9210
+ (vsx_movti_32bit): Likewise.
9211
+ (vec_store_<mode>): Use VSX iterator instead of vector iterator.
9212
+ (vsx_and<mode>3): Delete use of '?' constraint on inputs, just put
9213
+ one '?' on the appropriate output constraint. Do not allow TImode
9214
+ logical operations on 32-bit systems.
9215
+ (vsx_ior<mode>3): Likewise.
9216
+ (vsx_xor<mode>3): Likewise.
9217
+ (vsx_one_cmpl<mode>2): Likewise.
9218
+ (vsx_nor<mode>3): Likewise.
9219
+ (vsx_andc<mode>3): Likewise.
9220
+ (vsx_concat_<mode>): Likewise.
9221
+ (vsx_xxpermdi_<mode>): Fix thinko for non V2DF/V2DI modes.
9223
+ * config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from
9224
+ OPTION_MASK_VSX_TIMODE.
9225
+ (enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt.
9226
+ (STACK_SAVEAREA_MODE): Use PTImode instead of TImode.
9228
+ * config/rs6000/rs6000.md (INT mode attribute): Add PTImode.
9229
+ (TI2 iterator): New iterator for TImode, PTImode.
9230
+ (wd mode attribute): Add values for vector types.
9231
+ (movti_string): Replace TI move operations with operations for
9232
+ TImode and PTImode. Add support for TImode being allowed in VSX
9234
+ (mov<mode>_string, TImode/PTImode): Likewise.
9235
+ (movti_ppc64): Likewise.
9236
+ (mov<mode>_ppc64, TImode/PTImode): Likewise.
9237
+ (TI mode splitters): Likewise.
9239
+ * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt
9242
+2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
9244
+ Clone branch from gcc-4_8-branch, subversion id 196835.
9245
+ * REVISION: New file, track subversion id.
9247
--- a/src/gcc/calls.c
9248
+++ b/src/gcc/calls.c
9251
for (i = 0; i < num_actuals; i++)
9252
if (args[i].reg != 0 && ! args[i].pass_on_stack
9253
+ && GET_CODE (args[i].reg) != PARALLEL
9254
&& args[i].mode == BLKmode
9255
&& MEM_P (args[i].value)
9256
&& (MEM_ALIGN (args[i].value)
9257
@@ -1327,6 +1328,7 @@
9261
+ reg_parm_stack_space,
9262
args[i].pass_on_stack ? 0 : args[i].partial,
9263
fndecl, args_size, &args[i].locate);
9264
#ifdef BLOCK_REG_PADDING
9265
@@ -3171,7 +3173,9 @@
9266
group load/store machinery below. */
9267
if (!structure_value_addr
9268
&& !pcc_struct_value
9269
+ && TYPE_MODE (rettype) != VOIDmode
9270
&& TYPE_MODE (rettype) != BLKmode
9272
&& targetm.calls.return_in_msb (rettype))
9274
if (shift_return_value (TYPE_MODE (rettype), false, valreg))
9275
@@ -3734,7 +3738,8 @@
9277
argvec[count].reg != 0,
9279
- 0, NULL_TREE, &args_size, &argvec[count].locate);
9280
+ reg_parm_stack_space, 0,
9281
+ NULL_TREE, &args_size, &argvec[count].locate);
9283
if (argvec[count].reg == 0 || argvec[count].partial != 0
9284
|| reg_parm_stack_space > 0)
9285
@@ -3821,7 +3826,7 @@
9287
argvec[count].reg != 0,
9289
- argvec[count].partial,
9290
+ reg_parm_stack_space, argvec[count].partial,
9291
NULL_TREE, &args_size, &argvec[count].locate);
9292
args_size.constant += argvec[count].locate.size.constant;
9293
gcc_assert (!argvec[count].locate.size.var);
9294
--- a/src/gcc/REVISION
9295
+++ b/src/gcc/REVISION
9297
+[ibm/gcc-4_8-branch merged from gcc-4_8-branch, revision 206665]
9298
--- a/src/gcc/config.gcc
9299
+++ b/src/gcc/config.gcc
9304
- extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h"
9305
+ extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h"
9306
need_64bit_hwint=yes
9308
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[345678]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|Xe6500)
9309
@@ -3509,7 +3509,7 @@
9312
powerpc*-*-* | rs6000-*-*)
9313
- supported_defaults="cpu cpu_32 cpu_64 float tune tune_32 tune_64"
9314
+ supported_defaults="abi cpu cpu_32 cpu_64 float tune tune_32 tune_64"
9316
for which in cpu cpu_32 cpu_64 tune tune_32 tune_64; do
9317
eval "val=\$with_$which"
9318
@@ -3546,6 +3546,16 @@
9323
+ case "$with_abi" in
9324
+ "" | elfv1 | elfv2 )
9328
+ echo "Unknown ABI used in --with-abi=$with_abi"
9335
--- a/src/gcc/config/rs6000/power8.md
9336
+++ b/src/gcc/config/rs6000/power8.md
9338
+;; Scheduling description for IBM POWER8 processor.
9339
+;; Copyright (C) 2013 Free Software Foundation, Inc.
9341
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
9343
+;; This file is part of GCC.
9345
+;; GCC is free software; you can redistribute it and/or modify it
9346
+;; under the terms of the GNU General Public License as published
9347
+;; by the Free Software Foundation; either version 3, or (at your
9348
+;; option) any later version.
9350
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
9351
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
9352
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
9353
+;; License for more details.
9355
+;; You should have received a copy of the GNU General Public License
9356
+;; along with GCC; see the file COPYING3. If not see
9357
+;; <http://www.gnu.org/licenses/>.
9359
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
9361
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
9362
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
9363
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
9364
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
9365
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
9366
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
9367
+ du5_power8,du6_power8" "power8misc")
9370
+; Dispatch group reservations
9371
+(define_reservation "DU_any_power8"
9372
+ "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
9375
+; 2-way Cracked instructions go in slots 0-1
9376
+; (can also have a second in slots 3-4 if insns are adjacent)
9377
+(define_reservation "DU_cracked_power8"
9378
+ "du0_power8+du1_power8")
9380
+; Insns that are first in group
9381
+(define_reservation "DU_first_power8"
9384
+; Insns that are first and last in group
9385
+(define_reservation "DU_both_power8"
9386
+ "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
9387
+ du5_power8+du6_power8")
9389
+; Dispatch slots are allocated in order conforming to program order.
9390
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
9391
+ du5_power8,du6_power8")
9392
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
9394
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
9395
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
9396
+(absence_set "du4_power8" "du5_power8,du6_power8")
9397
+(absence_set "du5_power8" "du6_power8")
9400
+; Execution unit reservations
9401
+(define_reservation "FXU_power8"
9402
+ "fxu0_power8|fxu1_power8")
9404
+(define_reservation "LU_power8"
9405
+ "lu0_power8|lu1_power8")
9407
+(define_reservation "LSU_power8"
9408
+ "lsu0_power8|lsu1_power8")
9410
+(define_reservation "LU_or_LSU_power8"
9411
+ "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
9413
+(define_reservation "VSU_power8"
9414
+ "vsu0_power8|vsu1_power8")
9418
+(define_insn_reservation "power8-load" 3
9419
+ (and (eq_attr "type" "load")
9420
+ (eq_attr "cpu" "power8"))
9421
+ "DU_any_power8,LU_or_LSU_power8")
9423
+(define_insn_reservation "power8-load-update" 3
9424
+ (and (eq_attr "type" "load_u,load_ux")
9425
+ (eq_attr "cpu" "power8"))
9426
+ "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
9428
+(define_insn_reservation "power8-load-ext" 3
9429
+ (and (eq_attr "type" "load_ext")
9430
+ (eq_attr "cpu" "power8"))
9431
+ "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
9433
+(define_insn_reservation "power8-load-ext-update" 3
9434
+ (and (eq_attr "type" "load_ext_u,load_ext_ux")
9435
+ (eq_attr "cpu" "power8"))
9436
+ "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
9438
+(define_insn_reservation "power8-fpload" 5
9439
+ (and (eq_attr "type" "fpload,vecload")
9440
+ (eq_attr "cpu" "power8"))
9441
+ "DU_any_power8,LU_power8")
9443
+(define_insn_reservation "power8-fpload-update" 5
9444
+ (and (eq_attr "type" "fpload_u,fpload_ux")
9445
+ (eq_attr "cpu" "power8"))
9446
+ "DU_cracked_power8,LU_power8+FXU_power8")
9448
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
9449
+ (and (eq_attr "type" "store,store_u")
9450
+ (eq_attr "cpu" "power8"))
9451
+ "DU_any_power8,LSU_power8+LU_power8")
9453
+(define_insn_reservation "power8-store-update-indexed" 5
9454
+ (and (eq_attr "type" "store_ux")
9455
+ (eq_attr "cpu" "power8"))
9456
+ "DU_cracked_power8,LSU_power8+LU_power8")
9458
+(define_insn_reservation "power8-fpstore" 5
9459
+ (and (eq_attr "type" "fpstore")
9460
+ (eq_attr "cpu" "power8"))
9461
+ "DU_any_power8,LSU_power8+VSU_power8")
9463
+(define_insn_reservation "power8-fpstore-update" 5
9464
+ (and (eq_attr "type" "fpstore_u,fpstore_ux")
9465
+ (eq_attr "cpu" "power8"))
9466
+ "DU_any_power8,LSU_power8+VSU_power8")
9468
+(define_insn_reservation "power8-vecstore" 5
9469
+ (and (eq_attr "type" "vecstore")
9470
+ (eq_attr "cpu" "power8"))
9471
+ "DU_cracked_power8,LSU_power8+VSU_power8")
9473
+(define_insn_reservation "power8-larx" 3
9474
+ (and (eq_attr "type" "load_l")
9475
+ (eq_attr "cpu" "power8"))
9476
+ "DU_both_power8,LU_or_LSU_power8")
9478
+(define_insn_reservation "power8-stcx" 10
9479
+ (and (eq_attr "type" "store_c")
9480
+ (eq_attr "cpu" "power8"))
9481
+ "DU_both_power8,LSU_power8+LU_power8")
9483
+(define_insn_reservation "power8-sync" 1
9484
+ (and (eq_attr "type" "sync,isync")
9485
+ (eq_attr "cpu" "power8"))
9486
+ "DU_both_power8,LSU_power8")
9490
+(define_insn_reservation "power8-1cyc" 1
9491
+ (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
9492
+ var_shift_rotate,exts,isel")
9493
+ (eq_attr "cpu" "power8"))
9494
+ "DU_any_power8,FXU_power8")
9496
+; Extra cycle to LU/LSU
9497
+(define_bypass 2 "power8-1cyc"
9498
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
9499
+ power8-vecstore,power8-larx,power8-stcx")
9500
+; "power8-load,power8-load-update,power8-load-ext,\
9501
+; power8-load-ext-update,power8-fpload,power8-fpload-update,\
9502
+; power8-store,power8-store-update,power8-store-update-indexed,\
9503
+; power8-fpstore,power8-fpstore-update,power8-vecstore,\
9504
+; power8-larx,power8-stcx")
9506
+(define_insn_reservation "power8-2cyc" 2
9507
+ (and (eq_attr "type" "cntlz,popcnt")
9508
+ (eq_attr "cpu" "power8"))
9509
+ "DU_any_power8,FXU_power8")
9511
+(define_insn_reservation "power8-two" 2
9512
+ (and (eq_attr "type" "two")
9513
+ (eq_attr "cpu" "power8"))
9514
+ "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
9516
+(define_insn_reservation "power8-three" 3
9517
+ (and (eq_attr "type" "three")
9518
+ (eq_attr "cpu" "power8"))
9519
+ "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
9521
+; cmp - Normal compare insns
9522
+(define_insn_reservation "power8-cmp" 2
9523
+ (and (eq_attr "type" "cmp")
9524
+ (eq_attr "cpu" "power8"))
9525
+ "DU_any_power8,FXU_power8")
9527
+; fast_compare : add./and./nor./etc
9528
+(define_insn_reservation "power8-fast-compare" 2
9529
+ (and (eq_attr "type" "fast_compare")
9530
+ (eq_attr "cpu" "power8"))
9531
+ "DU_any_power8,FXU_power8")
9533
+; compare : rldicl./exts./etc
9534
+; delayed_compare : rlwinm./slwi./etc
9535
+; var_delayed_compare : rlwnm./slw./etc
9536
+(define_insn_reservation "power8-compare" 2
9537
+ (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
9538
+ (eq_attr "cpu" "power8"))
9539
+ "DU_cracked_power8,FXU_power8,FXU_power8")
9541
+; Extra cycle to LU/LSU
9542
+(define_bypass 3 "power8-fast-compare,power8-compare"
9543
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
9544
+ power8-vecstore,power8-larx,power8-stcx")
9546
+; 5 cycle CR latency
9547
+(define_bypass 5 "power8-fast-compare,power8-compare"
9548
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
9550
+(define_insn_reservation "power8-mul" 4
9551
+ (and (eq_attr "type" "imul,imul2,imul3,lmul")
9552
+ (eq_attr "cpu" "power8"))
9553
+ "DU_any_power8,FXU_power8")
9555
+(define_insn_reservation "power8-mul-compare" 4
9556
+ (and (eq_attr "type" "imul_compare,lmul_compare")
9557
+ (eq_attr "cpu" "power8"))
9558
+ "DU_cracked_power8,FXU_power8")
9560
+; Extra cycle to LU/LSU
9561
+(define_bypass 5 "power8-mul,power8-mul-compare"
9562
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
9563
+ power8-vecstore,power8-larx,power8-stcx")
9565
+; 7 cycle CR latency
9566
+(define_bypass 7 "power8-mul,power8-mul-compare"
9567
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
9569
+; FXU divides are not pipelined
9570
+(define_insn_reservation "power8-idiv" 37
9571
+ (and (eq_attr "type" "idiv")
9572
+ (eq_attr "cpu" "power8"))
9573
+ "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
9575
+(define_insn_reservation "power8-ldiv" 68
9576
+ (and (eq_attr "type" "ldiv")
9577
+ (eq_attr "cpu" "power8"))
9578
+ "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
9580
+(define_insn_reservation "power8-mtjmpr" 5
9581
+ (and (eq_attr "type" "mtjmpr")
9582
+ (eq_attr "cpu" "power8"))
9583
+ "DU_first_power8,FXU_power8")
9585
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
9586
+(define_insn_reservation "power8-mtcr" 3
9587
+ (and (eq_attr "type" "mtcr")
9588
+ (eq_attr "cpu" "power8"))
9589
+ "DU_both_power8,FXU_power8")
9593
+(define_insn_reservation "power8-mfjmpr" 5
9594
+ (and (eq_attr "type" "mfjmpr")
9595
+ (eq_attr "cpu" "power8"))
9596
+ "DU_first_power8,cru_power8+FXU_power8")
9598
+(define_insn_reservation "power8-crlogical" 3
9599
+ (and (eq_attr "type" "cr_logical,delayed_cr")
9600
+ (eq_attr "cpu" "power8"))
9601
+ "DU_first_power8,cru_power8")
9603
+(define_insn_reservation "power8-mfcr" 5
9604
+ (and (eq_attr "type" "mfcr")
9605
+ (eq_attr "cpu" "power8"))
9606
+ "DU_both_power8,cru_power8")
9608
+(define_insn_reservation "power8-mfcrf" 3
9609
+ (and (eq_attr "type" "mfcrf")
9610
+ (eq_attr "cpu" "power8"))
9611
+ "DU_first_power8,cru_power8")
9615
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
9616
+; prevent other insns from grabbing them once this is assigned.
9617
+(define_insn_reservation "power8-branch" 3
9618
+ (and (eq_attr "type" "jmpreg,branch")
9619
+ (eq_attr "cpu" "power8"))
9621
+ |du5_power8+du6_power8\
9622
+ |du4_power8+du5_power8+du6_power8\
9623
+ |du3_power8+du4_power8+du5_power8+du6_power8\
9624
+ |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
9625
+ |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
9626
+ |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
9627
+ du6_power8),bpu_power8")
9629
+; Branch updating LR/CTR feeding mf[lr|ctr]
9630
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
9633
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
9634
+(define_insn_reservation "power8-fp" 6
9635
+ (and (eq_attr "type" "fp,dmul")
9636
+ (eq_attr "cpu" "power8"))
9637
+ "DU_any_power8,VSU_power8")
9639
+; Additional 3 cycles for any CR result
9640
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
9642
+(define_insn_reservation "power8-fpcompare" 8
9643
+ (and (eq_attr "type" "fpcompare")
9644
+ (eq_attr "cpu" "power8"))
9645
+ "DU_any_power8,VSU_power8")
9647
+(define_insn_reservation "power8-sdiv" 27
9648
+ (and (eq_attr "type" "sdiv")
9649
+ (eq_attr "cpu" "power8"))
9650
+ "DU_any_power8,VSU_power8")
9652
+(define_insn_reservation "power8-ddiv" 33
9653
+ (and (eq_attr "type" "ddiv")
9654
+ (eq_attr "cpu" "power8"))
9655
+ "DU_any_power8,VSU_power8")
9657
+(define_insn_reservation "power8-sqrt" 32
9658
+ (and (eq_attr "type" "ssqrt")
9659
+ (eq_attr "cpu" "power8"))
9660
+ "DU_any_power8,VSU_power8")
9662
+(define_insn_reservation "power8-dsqrt" 44
9663
+ (and (eq_attr "type" "dsqrt")
9664
+ (eq_attr "cpu" "power8"))
9665
+ "DU_any_power8,VSU_power8")
9667
+(define_insn_reservation "power8-vecsimple" 2
9668
+ (and (eq_attr "type" "vecperm,vecsimple,veccmp")
9669
+ (eq_attr "cpu" "power8"))
9670
+ "DU_any_power8,VSU_power8")
9672
+(define_insn_reservation "power8-vecnormal" 6
9673
+ (and (eq_attr "type" "vecfloat,vecdouble")
9674
+ (eq_attr "cpu" "power8"))
9675
+ "DU_any_power8,VSU_power8")
9677
+(define_bypass 7 "power8-vecnormal"
9678
+ "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
9681
+(define_insn_reservation "power8-veccomplex" 7
9682
+ (and (eq_attr "type" "veccomplex")
9683
+ (eq_attr "cpu" "power8"))
9684
+ "DU_any_power8,VSU_power8")
9686
+(define_insn_reservation "power8-vecfdiv" 25
9687
+ (and (eq_attr "type" "vecfdiv")
9688
+ (eq_attr "cpu" "power8"))
9689
+ "DU_any_power8,VSU_power8")
9691
+(define_insn_reservation "power8-vecdiv" 31
9692
+ (and (eq_attr "type" "vecdiv")
9693
+ (eq_attr "cpu" "power8"))
9694
+ "DU_any_power8,VSU_power8")
9696
+(define_insn_reservation "power8-mffgpr" 5
9697
+ (and (eq_attr "type" "mffgpr")
9698
+ (eq_attr "cpu" "power8"))
9699
+ "DU_any_power8,VSU_power8")
9701
+(define_insn_reservation "power8-mftgpr" 6
9702
+ (and (eq_attr "type" "mftgpr")
9703
+ (eq_attr "cpu" "power8"))
9704
+ "DU_any_power8,VSU_power8")
9706
+(define_insn_reservation "power8-crypto" 7
9707
+ (and (eq_attr "type" "crypto")
9708
+ (eq_attr "cpu" "power8"))
9709
+ "DU_any_power8,VSU_power8")
9711
--- a/src/gcc/config/rs6000/vector.md
9712
+++ b/src/gcc/config/rs6000/vector.md
9717
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
9718
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
9720
;; Vector float modes
9721
(define_mode_iterator VEC_F [V4SF V2DF])
9723
;; Vector arithmetic modes
9724
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
9725
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
9727
;; Vector modes that need alginment via permutes
9728
(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
9730
(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
9732
;; Vector comparison modes
9733
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
9734
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
9736
;; Vector init/extract modes
9737
(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
9739
(define_mode_iterator VEC_64 [V2DI V2DF])
9741
;; Vector reload iterator
9742
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
9743
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
9745
;; Base type from vector mode
9746
(define_mode_attr VEC_base [(V16QI "QI")
9751
-;; Vector move instructions.
9752
+;; Vector move instructions. Little-endian VSX loads and stores require
9753
+;; special handling to circumvent "element endianness."
9754
(define_expand "mov<mode>"
9755
[(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
9756
(match_operand:VEC_M 1 "any_operand" ""))]
9757
@@ -104,6 +105,16 @@
9758
&& !vlogical_operand (operands[1], <MODE>mode))
9759
operands[1] = force_reg (<MODE>mode, operands[1]);
9761
+ if (!BYTES_BIG_ENDIAN
9762
+ && VECTOR_MEM_VSX_P (<MODE>mode)
9763
+ && <MODE>mode != TImode
9764
+ && !gpr_or_gpr_p (operands[0], operands[1])
9765
+ && (memory_operand (operands[0], <MODE>mode)
9766
+ ^ memory_operand (operands[1], <MODE>mode)))
9768
+ rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
9773
;; Generic vector floating point load/store instructions. These will match
9775
(match_operand:VEC_L 1 "input_operand" ""))]
9776
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
9778
- && gpr_or_gpr_p (operands[0], operands[1])"
9779
+ && gpr_or_gpr_p (operands[0], operands[1])
9780
+ && !direct_move_p (operands[0], operands[1])
9781
+ && !quad_load_store_p (operands[0], operands[1])"
9784
rs6000_split_multireg_move (operands[0], operands[1]);
9786
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
9787
(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
9788
(match_operand:VEC_F 2 "vfloat_operand" "")))]
9789
- "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
9790
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9792
if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
9795
(match_operand:VEC_I 5 "vint_operand" "")])
9796
(match_operand:VEC_I 1 "vint_operand" "")
9797
(match_operand:VEC_I 2 "vint_operand" "")))]
9798
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
9799
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9802
if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
9804
(match_operand:VEC_I 5 "vint_operand" "")])
9805
(match_operand:VEC_I 1 "vint_operand" "")
9806
(match_operand:VEC_I 2 "vint_operand" "")))]
9807
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
9808
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9811
if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
9812
@@ -505,14 +518,14 @@
9813
[(set (match_operand:VEC_I 0 "vint_operand" "")
9814
(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9815
(match_operand:VEC_I 2 "vint_operand" "")))]
9816
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
9817
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9820
(define_expand "vector_geu<mode>"
9821
[(set (match_operand:VEC_I 0 "vint_operand" "")
9822
(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9823
(match_operand:VEC_I 2 "vint_operand" "")))]
9824
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
9825
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9828
(define_insn_and_split "*vector_uneq<mode>"
9829
@@ -708,48 +721,19 @@
9833
-;; Vector logical instructions
9834
-(define_expand "xor<mode>3"
9835
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9836
- (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
9837
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
9838
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9840
+;; Vector count leading zeros
9841
+(define_expand "clz<mode>2"
9842
+ [(set (match_operand:VEC_I 0 "register_operand" "")
9843
+ (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
9844
+ "TARGET_P8_VECTOR")
9846
-(define_expand "ior<mode>3"
9847
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9848
- (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
9849
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
9850
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9852
+;; Vector population count
9853
+(define_expand "popcount<mode>2"
9854
+ [(set (match_operand:VEC_I 0 "register_operand" "")
9855
+ (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
9856
+ "TARGET_P8_VECTOR")
9858
-(define_expand "and<mode>3"
9859
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9860
- (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
9861
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
9862
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9865
-(define_expand "one_cmpl<mode>2"
9866
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9867
- (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
9868
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9871
-(define_expand "nor<mode>3"
9872
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9873
- (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
9874
- (match_operand:VEC_L 2 "vlogical_operand" ""))))]
9875
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9878
-(define_expand "andc<mode>3"
9879
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
9880
- (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
9881
- (match_operand:VEC_L 1 "vlogical_operand" "")))]
9882
- "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9886
;; Same size conversions
9887
(define_expand "float<VEC_int><mode>2"
9888
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
9891
rtx reg = gen_reg_rtx (V4SFmode);
9893
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
9894
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
9895
emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
9900
rtx reg = gen_reg_rtx (V4SFmode);
9902
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
9903
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
9904
emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
9909
rtx reg = gen_reg_rtx (V4SImode);
9911
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
9912
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
9913
emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
9918
rtx reg = gen_reg_rtx (V4SImode);
9920
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
9921
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
9922
emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
9927
rtx reg = gen_reg_rtx (V4SImode);
9929
- rs6000_expand_interleave (reg, operands[1], operands[1], true);
9930
+ rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
9931
emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
9936
rtx reg = gen_reg_rtx (V4SImode);
9938
- rs6000_expand_interleave (reg, operands[1], operands[1], false);
9939
+ rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
9940
emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
9943
@@ -963,8 +947,19 @@
9944
(match_operand:V16QI 3 "vlogical_operand" "")]
9945
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
9947
- emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2],
9949
+ if (BYTES_BIG_ENDIAN)
9950
+ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
9951
+ operands[2], operands[3]));
9954
+ /* We have changed lvsr to lvsl, so to complete the transformation
9955
+ of vperm for LE, we must swap the inputs. */
9956
+ rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
9957
+ gen_rtvec (3, operands[2],
9958
+ operands[1], operands[3]),
9960
+ emit_move_insn (operands[0], unspec);
9965
@@ -1064,7 +1059,7 @@
9966
[(set (match_operand:VEC_I 0 "vint_operand" "")
9967
(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9968
(match_operand:VEC_I 2 "vint_operand" "")))]
9970
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9973
;; Expanders for arithmetic shift left on each vector element
9974
@@ -1072,7 +1067,7 @@
9975
[(set (match_operand:VEC_I 0 "vint_operand" "")
9976
(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9977
(match_operand:VEC_I 2 "vint_operand" "")))]
9979
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9982
;; Expanders for logical shift right on each vector element
9983
@@ -1080,7 +1075,7 @@
9984
[(set (match_operand:VEC_I 0 "vint_operand" "")
9985
(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9986
(match_operand:VEC_I 2 "vint_operand" "")))]
9988
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
9991
;; Expanders for arithmetic shift right on each vector element
9992
@@ -1088,7 +1083,7 @@
9993
[(set (match_operand:VEC_I 0 "vint_operand" "")
9994
(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
9995
(match_operand:VEC_I 2 "vint_operand" "")))]
9997
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
10000
;; Vector reduction expanders for VSX
10001
--- a/src/gcc/config/rs6000/constraints.md
10002
+++ b/src/gcc/config/rs6000/constraints.md
10003
@@ -52,22 +52,62 @@
10006
;; Use w as a prefix to add VSX modes
10007
-;; vector double (V2DF)
10008
+;; any VSX register
10009
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
10010
+ "Any VSX register if the -mvsx option was used or NO_REGS.")
10012
(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
10014
+ "VSX vector register to hold vector double data or NO_REGS.")
10016
-;; vector float (V4SF)
10017
(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
10019
+ "VSX vector register to hold vector float data or NO_REGS.")
10021
-;; scalar double (DF)
10022
+(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]"
10023
+ "If -mmfpgpr was used, a floating point register or NO_REGS.")
10025
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
10026
+ "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
10028
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
10029
+ "VSX register if direct move instructions are enabled, or NO_REGS.")
10031
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
10032
+;; direct move directly, and movsf can't to move between the register sets.
10033
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
10034
+(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
10036
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
10037
+ "General purpose register if 64-bit instructions are enabled or NO_REGS.")
10039
(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
10041
+ "VSX vector register to hold scalar double values or NO_REGS.")
10043
-;; any VSX register
10044
-(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
10046
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
10047
+ "VSX vector register to hold 128 bit integer or NO_REGS.")
10049
+(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]"
10050
+ "Altivec register to use for float/32-bit int loads/stores or NO_REGS.")
10052
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
10053
+ "Altivec register to use for double loads/stores or NO_REGS.")
10055
+(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
10056
+ "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
10058
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
10059
+ "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
10061
+(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]"
10062
+ "VSX vector register to hold scalar float values or NO_REGS.")
10064
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
10065
+ "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
10067
+;; Lq/stq validates the address for load/store quad
10068
+(define_memory_constraint "wQ"
10069
+ "Memory operand suitable for the load/store quad instructions"
10070
+ (match_operand 0 "quad_memory_operand"))
10072
;; Altivec style load/store that ignores the bottom bits of the address
10073
(define_memory_constraint "wZ"
10074
"Indexed or indirect memory operand, ignoring the bottom 4 bits"
10075
--- a/src/gcc/config/rs6000/predicates.md
10076
+++ b/src/gcc/config/rs6000/predicates.md
10077
@@ -124,6 +124,11 @@
10078
(and (match_code "const_int")
10079
(match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
10081
+;; Return 1 if op is a unsigned 3-bit constant integer.
10082
+(define_predicate "u3bit_cint_operand"
10083
+ (and (match_code "const_int")
10084
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
10086
;; Return 1 if op is a unsigned 5-bit constant integer.
10087
(define_predicate "u5bit_cint_operand"
10088
(and (match_code "const_int")
10089
@@ -135,6 +140,11 @@
10090
(and (match_code "const_int")
10091
(match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127")))
10093
+;; Return 1 if op is a unsigned 10-bit constant integer.
10094
+(define_predicate "u10bit_cint_operand"
10095
+ (and (match_code "const_int")
10096
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023")))
10098
;; Return 1 if op is a constant integer that can fit in a D field.
10099
(define_predicate "short_cint_operand"
10100
(and (match_code "const_int")
10101
@@ -166,6 +176,11 @@
10102
(and (match_code "const_int")
10103
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
10105
+;; Match op = 0..15
10106
+(define_predicate "const_0_to_15_operand"
10107
+ (and (match_code "const_int")
10108
+ (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
10110
;; Return 1 if op is a register that is not special.
10111
(define_predicate "gpc_reg_operand"
10112
(match_operand 0 "register_operand")
10113
@@ -182,9 +197,95 @@
10114
if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
10117
+ if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
10120
return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
10123
+;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't
10124
+;; allow floating point or vector registers.
10125
+(define_predicate "int_reg_operand"
10126
+ (match_operand 0 "register_operand")
10128
+ if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
10131
+ if (GET_CODE (op) == SUBREG)
10132
+ op = SUBREG_REG (op);
10137
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
10140
+ return INT_REGNO_P (REGNO (op));
10143
+;; Like int_reg_operand, but only return true for base registers
10144
+(define_predicate "base_reg_operand"
10145
+ (match_operand 0 "int_reg_operand")
10147
+ if (GET_CODE (op) == SUBREG)
10148
+ op = SUBREG_REG (op);
10153
+ return (REGNO (op) != FIRST_GPR_REGNO);
10156
+;; Return 1 if op is a HTM specific SPR register.
10157
+(define_predicate "htm_spr_reg_operand"
10158
+ (match_operand 0 "register_operand")
10163
+ if (GET_CODE (op) == SUBREG)
10164
+ op = SUBREG_REG (op);
10169
+ switch (REGNO (op))
10171
+ case TFHAR_REGNO:
10172
+ case TFIAR_REGNO:
10173
+ case TEXASR_REGNO:
10179
+ /* Unknown SPR. */
10183
+;; Return 1 if op is a general purpose register that is an even register
10184
+;; which suitable for a load/store quad operation
10185
+(define_predicate "quad_int_reg_operand"
10186
+ (match_operand 0 "register_operand")
10190
+ if (!TARGET_QUAD_MEMORY)
10193
+ if (GET_CODE (op) == SUBREG)
10194
+ op = SUBREG_REG (op);
10200
+ if (r >= FIRST_PSEUDO_REGISTER)
10203
+ return (INT_REGNO_P (r) && ((r & 1) == 0));
10206
;; Return 1 if op is a register that is a condition register field.
10207
(define_predicate "cc_reg_operand"
10208
(match_operand 0 "register_operand")
10209
@@ -315,6 +416,11 @@
10210
&& CONST_DOUBLE_HIGH (op) == 0")
10211
(match_operand 0 "gpc_reg_operand"))))
10213
+;; Like reg_or_logical_cint_operand, but allow vsx registers
10214
+(define_predicate "vsx_reg_or_cint_operand"
10215
+ (ior (match_operand 0 "vsx_register_operand")
10216
+ (match_operand 0 "reg_or_logical_cint_operand")))
10218
;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
10219
;; with no more than one instruction per word.
10220
(define_predicate "easy_fp_constant"
10221
@@ -333,6 +439,11 @@
10225
+ /* The constant 0.0 is easy under VSX. */
10226
+ if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode)
10227
+ && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode))
10230
if (DECIMAL_FLOAT_MODE_P (mode))
10233
@@ -521,6 +632,54 @@
10234
(and (match_operand 0 "memory_operand")
10235
(match_test "offsettable_nonstrict_memref_p (op)")))
10237
+;; Return 1 if the operand is suitable for load/store quad memory.
10238
+(define_predicate "quad_memory_operand"
10239
+ (match_code "mem")
10241
+ rtx addr, op0, op1;
10244
+ if (!TARGET_QUAD_MEMORY)
10247
+ else if (!memory_operand (op, mode))
10250
+ else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
10253
+ else if (MEM_ALIGN (op) < 128)
10258
+ addr = XEXP (op, 0);
10259
+ if (int_reg_operand (addr, Pmode))
10262
+ else if (GET_CODE (addr) != PLUS)
10267
+ op0 = XEXP (addr, 0);
10268
+ op1 = XEXP (addr, 1);
10269
+ ret = (int_reg_operand (op0, Pmode)
10270
+ && GET_CODE (op1) == CONST_INT
10271
+ && IN_RANGE (INTVAL (op1), -32768, 32767)
10272
+ && (INTVAL (op1) & 15) == 0);
10276
+ if (TARGET_DEBUG_ADDR)
10278
+ fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
10285
;; Return 1 if the operand is an indexed or indirect memory operand.
10286
(define_predicate "indexed_or_indirect_operand"
10288
@@ -535,6 +694,19 @@
10289
return indexed_or_indirect_address (op, mode);
10292
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
10293
+;; moves are supported.
10294
+(define_predicate "reg_or_indexed_operand"
10295
+ (match_code "mem,reg")
10298
+ return indexed_or_indirect_operand (op, mode);
10299
+ else if (TARGET_DIRECT_MOVE)
10300
+ return register_operand (op, mode);
10305
;; Return 1 if the operand is an indexed or indirect memory operand with an
10306
;; AND -16 in it, used to recognize when we need to switch to Altivec loads
10307
;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
10308
@@ -560,6 +732,28 @@
10309
&& REG_P (XEXP (op, 1)))")
10310
(match_operand 0 "address_operand")))
10312
+;; Return 1 if the operand is an index-form address.
10313
+(define_special_predicate "indexed_address"
10314
+ (match_test "(GET_CODE (op) == PLUS
10315
+ && REG_P (XEXP (op, 0))
10316
+ && REG_P (XEXP (op, 1)))"))
10318
+;; Return 1 if the operand is a MEM with an update-form address. This may
10319
+;; also include update-indexed form.
10320
+(define_special_predicate "update_address_mem"
10321
+ (match_test "(MEM_P (op)
10322
+ && (GET_CODE (XEXP (op, 0)) == PRE_INC
10323
+ || GET_CODE (XEXP (op, 0)) == PRE_DEC
10324
+ || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))"))
10326
+;; Return 1 if the operand is a MEM with an update-indexed-form address. Note
10327
+;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the
10328
+;; increment is based on the mode size and will therefor always be a const.
10329
+(define_special_predicate "update_indexed_address_mem"
10330
+ (match_test "(MEM_P (op)
10331
+ && GET_CODE (XEXP (op, 0)) == PRE_MODIFY
10332
+ && indexed_address (XEXP (XEXP (op, 0), 1), mode))"))
10334
;; Used for the destination of the fix_truncdfsi2 expander.
10335
;; If stfiwx will be used, the result goes to memory; otherwise,
10336
;; we're going to emit a store and a load of a subreg, so the dest is a
10337
@@ -883,7 +1077,8 @@
10338
(and (match_code "symbol_ref")
10339
(match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
10340
&& ((SYMBOL_REF_LOCAL_P (op)
10341
- && (DEFAULT_ABI != ABI_AIX
10342
+ && ((DEFAULT_ABI != ABI_AIX
10343
+ && DEFAULT_ABI != ABI_ELFv2)
10344
|| !SYMBOL_REF_EXTERNAL_P (op)))
10345
|| (op == XEXP (DECL_RTL (current_function_decl),
10347
@@ -1364,6 +1559,26 @@
10351
+;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL.
10352
+(define_predicate "crsave_operation"
10353
+ (match_code "parallel")
10355
+ int count = XVECLEN (op, 0);
10358
+ for (i = 1; i < count; i++)
10360
+ rtx exp = XVECEXP (op, 0, i);
10362
+ if (GET_CODE (exp) != USE
10363
+ || GET_CODE (XEXP (exp, 0)) != REG
10364
+ || GET_MODE (XEXP (exp, 0)) != CCmode
10365
+ || ! CR_REGNO_P (REGNO (XEXP (exp, 0))))
10371
;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
10372
(define_predicate "lmw_operation"
10373
(match_code "parallel")
10374
@@ -1534,3 +1749,99 @@
10376
return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
10379
+;; Match the first insn (addis) in fusing the combination of addis and loads to
10380
+;; GPR registers on power8.
10381
+(define_predicate "fusion_gpr_addis"
10382
+ (match_code "const_int,high,plus")
10384
+ HOST_WIDE_INT value;
10387
+ if (GET_CODE (op) == HIGH)
10390
+ if (CONST_INT_P (op))
10393
+ else if (GET_CODE (op) == PLUS
10394
+ && base_reg_operand (XEXP (op, 0), Pmode)
10395
+ && CONST_INT_P (XEXP (op, 1)))
10396
+ int_const = XEXP (op, 1);
10401
+ /* Power8 currently will only do the fusion if the top 11 bits of the addis
10402
+ value are all 1's or 0's. */
10403
+ value = INTVAL (int_const);
10404
+ if ((value & (HOST_WIDE_INT)0xffff) != 0)
10407
+ if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
10410
+ return (IN_RANGE (value >> 16, -32, 31));
10413
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
10414
+;; and loads to GPR registers on power8.
10415
+(define_predicate "fusion_gpr_mem_load"
10416
+ (match_code "mem,sign_extend,zero_extend")
10420
+ /* Handle sign/zero extend. */
10421
+ if (GET_CODE (op) == ZERO_EXTEND
10422
+ || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
10424
+ op = XEXP (op, 0);
10425
+ mode = GET_MODE (op);
10439
+ if (!TARGET_POWERPC64)
10447
+ addr = XEXP (op, 0);
10448
+ if (GET_CODE (addr) == PLUS)
10450
+ rtx base = XEXP (addr, 0);
10451
+ rtx offset = XEXP (addr, 1);
10453
+ return (base_reg_operand (base, GET_MODE (base))
10454
+ && satisfies_constraint_I (offset));
10457
+ else if (GET_CODE (addr) == LO_SUM)
10459
+ rtx base = XEXP (addr, 0);
10460
+ rtx offset = XEXP (addr, 1);
10462
+ if (!base_reg_operand (base, GET_MODE (base)))
10465
+ else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
10466
+ return small_toc_ref (offset, GET_MODE (offset));
10468
+ else if (TARGET_ELF && !TARGET_POWERPC64)
10469
+ return CONSTANT_P (offset);
10474
--- a/src/gcc/config/rs6000/ppc-asm.h
10475
+++ b/src/gcc/config/rs6000/ppc-asm.h
10476
@@ -256,7 +256,30 @@
10477
* the real function with one or two leading periods respectively.
10480
-#if defined (__powerpc64__)
10481
+#if defined(__powerpc64__) && _CALL_ELF == 2
10483
+/* Defining "toc" above breaks @toc in assembler code. */
10486
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
10487
+#define JUMP_TARGET(name) FUNC_NAME(name)
10488
+#define FUNC_START(name) \
10489
+ .type FUNC_NAME(name),@function; \
10490
+ .globl FUNC_NAME(name); \
10491
+FUNC_NAME(name): \
10492
+0: addis 2,12,(.TOC.-0b)@ha; \
10493
+ addi 2,2,(.TOC.-0b)@l; \
10494
+ .localentry FUNC_NAME(name),.-FUNC_NAME(name)
10496
+#define HIDDEN_FUNC(name) \
10497
+ FUNC_START(name) \
10498
+ .hidden FUNC_NAME(name);
10500
+#define FUNC_END(name) \
10501
+ .size FUNC_NAME(name),.-FUNC_NAME(name)
10503
+#elif defined (__powerpc64__)
10505
#define FUNC_NAME(name) GLUE(.,name)
10506
#define JUMP_TARGET(name) FUNC_NAME(name)
10507
#define FUNC_START(name) \
10508
--- a/src/gcc/config/rs6000/htm.md
10509
+++ b/src/gcc/config/rs6000/htm.md
10511
+;; Hardware Transactional Memory (HTM) patterns.
10512
+;; Copyright (C) 2013 Free Software Foundation, Inc.
10513
+;; Contributed by Peter Bergner <bergner@vnet.ibm.com>.
10515
+;; This file is part of GCC.
10517
+;; GCC is free software; you can redistribute it and/or modify it
10518
+;; under the terms of the GNU General Public License as published
10519
+;; by the Free Software Foundation; either version 3, or (at your
10520
+;; option) any later version.
10522
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
10523
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10524
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10525
+;; License for more details.
10527
+;; You should have received a copy of the GNU General Public License
10528
+;; along with GCC; see the file COPYING3. If not see
10529
+;; <http://www.gnu.org/licenses/>.
10535
+ (TEXASRU_SPR 131)
10536
+ (MAX_HTM_OPERANDS 4)
10540
+;; UNSPEC_VOLATILE usage
10543
+(define_c_enum "unspecv"
10544
+ [UNSPECV_HTM_TABORT
10545
+ UNSPECV_HTM_TABORTDC
10546
+ UNSPECV_HTM_TABORTDCI
10547
+ UNSPECV_HTM_TABORTWC
10548
+ UNSPECV_HTM_TABORTWCI
10549
+ UNSPECV_HTM_TBEGIN
10550
+ UNSPECV_HTM_TCHECK
10552
+ UNSPECV_HTM_TRECHKPT
10553
+ UNSPECV_HTM_TRECLAIM
10555
+ UNSPECV_HTM_MFSPR
10556
+ UNSPECV_HTM_MTSPR
10560
+(define_expand "tabort"
10561
+ [(set (match_dup 2)
10562
+ (unspec_volatile:CC [(match_operand:SI 1 "int_reg_operand" "")]
10563
+ UNSPECV_HTM_TABORT))
10564
+ (set (match_dup 3)
10565
+ (eq:SI (match_dup 2)
10567
+ (set (match_operand:SI 0 "int_reg_operand" "")
10568
+ (minus:SI (const_int 1) (match_dup 3)))]
10571
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10572
+ operands[3] = gen_reg_rtx (SImode);
10575
+(define_insn "*tabort_internal"
10576
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10577
+ (unspec_volatile:CC [(match_operand:SI 0 "int_reg_operand" "r")]
10578
+ UNSPECV_HTM_TABORT))]
10581
+ [(set_attr "type" "htm")
10582
+ (set_attr "length" "4")])
10584
+(define_expand "tabortdc"
10585
+ [(set (match_dup 4)
10586
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
10587
+ (match_operand:SI 2 "gpc_reg_operand" "r")
10588
+ (match_operand:SI 3 "gpc_reg_operand" "r")]
10589
+ UNSPECV_HTM_TABORTDC))
10590
+ (set (match_dup 5)
10591
+ (eq:SI (match_dup 4)
10593
+ (set (match_operand:SI 0 "int_reg_operand" "")
10594
+ (minus:SI (const_int 1) (match_dup 5)))]
10597
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
10598
+ operands[5] = gen_reg_rtx (SImode);
10601
+(define_insn "*tabortdc_internal"
10602
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
10603
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
10604
+ (match_operand:SI 1 "gpc_reg_operand" "r")
10605
+ (match_operand:SI 2 "gpc_reg_operand" "r")]
10606
+ UNSPECV_HTM_TABORTDC))]
10608
+ "tabortdc. %0,%1,%2"
10609
+ [(set_attr "type" "htm")
10610
+ (set_attr "length" "4")])
10612
+(define_expand "tabortdci"
10613
+ [(set (match_dup 4)
10614
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
10615
+ (match_operand:SI 2 "gpc_reg_operand" "r")
10616
+ (match_operand 3 "s5bit_cint_operand" "n")]
10617
+ UNSPECV_HTM_TABORTDCI))
10618
+ (set (match_dup 5)
10619
+ (eq:SI (match_dup 4)
10621
+ (set (match_operand:SI 0 "int_reg_operand" "")
10622
+ (minus:SI (const_int 1) (match_dup 5)))]
10625
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
10626
+ operands[5] = gen_reg_rtx (SImode);
10629
+(define_insn "*tabortdci_internal"
10630
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
10631
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
10632
+ (match_operand:SI 1 "gpc_reg_operand" "r")
10633
+ (match_operand 2 "s5bit_cint_operand" "n")]
10634
+ UNSPECV_HTM_TABORTDCI))]
10636
+ "tabortdci. %0,%1,%2"
10637
+ [(set_attr "type" "htm")
10638
+ (set_attr "length" "4")])
10640
+(define_expand "tabortwc"
10641
+ [(set (match_dup 4)
10642
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
10643
+ (match_operand:SI 2 "gpc_reg_operand" "r")
10644
+ (match_operand:SI 3 "gpc_reg_operand" "r")]
10645
+ UNSPECV_HTM_TABORTWC))
10646
+ (set (match_dup 5)
10647
+ (eq:SI (match_dup 4)
10649
+ (set (match_operand:SI 0 "int_reg_operand" "")
10650
+ (minus:SI (const_int 1) (match_dup 5)))]
10653
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
10654
+ operands[5] = gen_reg_rtx (SImode);
10657
+(define_insn "*tabortwc_internal"
10658
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
10659
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
10660
+ (match_operand:SI 1 "gpc_reg_operand" "r")
10661
+ (match_operand:SI 2 "gpc_reg_operand" "r")]
10662
+ UNSPECV_HTM_TABORTWC))]
10664
+ "tabortwc. %0,%1,%2"
10665
+ [(set_attr "type" "htm")
10666
+ (set_attr "length" "4")])
10668
+(define_expand "tabortwci"
10669
+ [(set (match_dup 4)
10670
+ (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
10671
+ (match_operand:SI 2 "gpc_reg_operand" "r")
10672
+ (match_operand 3 "s5bit_cint_operand" "n")]
10673
+ UNSPECV_HTM_TABORTWCI))
10674
+ (set (match_dup 5)
10675
+ (eq:SI (match_dup 4)
10677
+ (set (match_operand:SI 0 "int_reg_operand" "")
10678
+ (minus:SI (const_int 1) (match_dup 5)))]
10681
+ operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
10682
+ operands[5] = gen_reg_rtx (SImode);
10685
+(define_expand "ttest"
10686
+ [(set (match_dup 1)
10687
+ (unspec_volatile:CC [(const_int 0)
10690
+ UNSPECV_HTM_TABORTWCI))
10691
+ (set (subreg:CC (match_dup 2) 0) (match_dup 1))
10692
+ (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
10693
+ (parallel [(set (match_operand:SI 0 "int_reg_operand" "")
10694
+ (and:SI (match_dup 3) (const_int 15)))
10695
+ (clobber (scratch:CC))])]
10698
+ operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
10699
+ operands[2] = gen_reg_rtx (SImode);
10700
+ operands[3] = gen_reg_rtx (SImode);
10703
+(define_insn "*tabortwci_internal"
10704
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
10705
+ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
10706
+ (match_operand:SI 1 "gpc_reg_operand" "r")
10707
+ (match_operand 2 "s5bit_cint_operand" "n")]
10708
+ UNSPECV_HTM_TABORTWCI))]
10710
+ "tabortwci. %0,%1,%2"
10711
+ [(set_attr "type" "htm")
10712
+ (set_attr "length" "4")])
10714
+(define_expand "tbegin"
10715
+ [(set (match_dup 2)
10716
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
10717
+ UNSPECV_HTM_TBEGIN))
10718
+ (set (match_dup 3)
10719
+ (eq:SI (match_dup 2)
10721
+ (set (match_operand:SI 0 "int_reg_operand" "")
10722
+ (minus:SI (const_int 1) (match_dup 3)))]
10725
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10726
+ operands[3] = gen_reg_rtx (SImode);
10729
+(define_insn "*tbegin_internal"
10730
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10731
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
10732
+ UNSPECV_HTM_TBEGIN))]
10735
+ [(set_attr "type" "htm")
10736
+ (set_attr "length" "4")])
10738
+(define_expand "tcheck"
10739
+ [(set (match_dup 2)
10740
+ (unspec_volatile:CC [(match_operand 1 "u3bit_cint_operand" "n")]
10741
+ UNSPECV_HTM_TCHECK))
10742
+ (set (match_dup 3)
10743
+ (eq:SI (match_dup 2)
10745
+ (set (match_operand:SI 0 "int_reg_operand" "")
10746
+ (minus:SI (const_int 1) (match_dup 3)))]
10749
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10750
+ operands[3] = gen_reg_rtx (SImode);
10753
+(define_insn "*tcheck_internal"
10754
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10755
+ (unspec_volatile:CC [(match_operand 0 "u3bit_cint_operand" "n")]
10756
+ UNSPECV_HTM_TCHECK))]
10759
+ [(set_attr "type" "htm")
10760
+ (set_attr "length" "4")])
10762
+(define_expand "tend"
10763
+ [(set (match_dup 2)
10764
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
10765
+ UNSPECV_HTM_TEND))
10766
+ (set (match_dup 3)
10767
+ (eq:SI (match_dup 2)
10769
+ (set (match_operand:SI 0 "int_reg_operand" "")
10770
+ (minus:SI (const_int 1) (match_dup 3)))]
10773
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10774
+ operands[3] = gen_reg_rtx (SImode);
10777
+(define_insn "*tend_internal"
10778
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10779
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
10780
+ UNSPECV_HTM_TEND))]
10783
+ [(set_attr "type" "htm")
10784
+ (set_attr "length" "4")])
10786
+(define_expand "trechkpt"
10787
+ [(set (match_dup 1)
10788
+ (unspec_volatile:CC [(const_int 0)]
10789
+ UNSPECV_HTM_TRECHKPT))
10790
+ (set (match_dup 2)
10791
+ (eq:SI (match_dup 1)
10793
+ (set (match_operand:SI 0 "int_reg_operand" "")
10794
+ (minus:SI (const_int 1) (match_dup 2)))]
10797
+ operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
10798
+ operands[2] = gen_reg_rtx (SImode);
10801
+(define_insn "*trechkpt_internal"
10802
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
10803
+ (unspec_volatile:CC [(const_int 0)]
10804
+ UNSPECV_HTM_TRECHKPT))]
10807
+ [(set_attr "type" "htm")
10808
+ (set_attr "length" "4")])
10810
+(define_expand "treclaim"
10811
+ [(set (match_dup 2)
10812
+ (unspec_volatile:CC [(match_operand:SI 1 "gpc_reg_operand" "r")]
10813
+ UNSPECV_HTM_TRECLAIM))
10814
+ (set (match_dup 3)
10815
+ (eq:SI (match_dup 2)
10817
+ (set (match_operand:SI 0 "int_reg_operand" "")
10818
+ (minus:SI (const_int 1) (match_dup 3)))]
10821
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10822
+ operands[3] = gen_reg_rtx (SImode);
10825
+(define_insn "*treclaim_internal"
10826
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10827
+ (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
10828
+ UNSPECV_HTM_TRECLAIM))]
10831
+ [(set_attr "type" "htm")
10832
+ (set_attr "length" "4")])
10834
+(define_expand "tsr"
10835
+ [(set (match_dup 2)
10836
+ (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
10837
+ UNSPECV_HTM_TSR))
10838
+ (set (match_dup 3)
10839
+ (eq:SI (match_dup 2)
10841
+ (set (match_operand:SI 0 "int_reg_operand" "")
10842
+ (minus:SI (const_int 1) (match_dup 3)))]
10845
+ operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
10846
+ operands[3] = gen_reg_rtx (SImode);
10849
+(define_insn "*tsr_internal"
10850
+ [(set (match_operand:CC 1 "cc_reg_operand" "=x")
10851
+ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
10852
+ UNSPECV_HTM_TSR))]
10855
+ [(set_attr "type" "htm")
10856
+ (set_attr "length" "4")])
10858
+(define_insn "htm_mfspr_<mode>"
10859
+ [(set (match_operand:P 0 "gpc_reg_operand" "=r")
10860
+ (unspec_volatile:P [(match_operand 1 "u10bit_cint_operand" "n")
10861
+ (match_operand:P 2 "htm_spr_reg_operand" "")]
10862
+ UNSPECV_HTM_MFSPR))]
10865
+ [(set_attr "type" "htm")
10866
+ (set_attr "length" "4")])
10868
+(define_insn "htm_mtspr_<mode>"
10869
+ [(set (match_operand:P 2 "htm_spr_reg_operand" "")
10870
+ (unspec_volatile:P [(match_operand:P 0 "gpc_reg_operand" "r")
10871
+ (match_operand 1 "u10bit_cint_operand" "n")]
10872
+ UNSPECV_HTM_MTSPR))]
10875
+ [(set_attr "type" "htm")
10876
+ (set_attr "length" "4")])
10877
--- a/src/gcc/config/rs6000/rs6000-modes.def
10878
+++ b/src/gcc/config/rs6000/rs6000-modes.def
10880
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
10881
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
10882
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
10884
+/* Replacement for TImode that only is allowed in GPRs. We also use PTImode
10885
+ for quad memory atomic operations to force getting an even/odd register
10887
+PARTIAL_INT_MODE (TI);
10888
--- a/src/gcc/config/rs6000/rs6000-cpus.def
10889
+++ b/src/gcc/config/rs6000/rs6000-cpus.def
10891
ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel,
10892
fre, fsqrt, etc. were no longer documented as optional. Group masks by
10893
server and embedded. */
10894
-#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \
10895
+#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \
10896
| OPTION_MASK_CMPB \
10897
| OPTION_MASK_RECIP_PRECISION \
10898
| OPTION_MASK_PPC_GFXOPT \
10899
@@ -38,12 +38,23 @@
10901
/* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
10902
altivec is a win so enable it. */
10903
+ /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until
10904
+ PR 58587 is fixed. */
10905
#define ISA_2_6_MASKS_EMBEDDED (ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD)
10906
#define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \
10907
| OPTION_MASK_POPCNTD \
10908
| OPTION_MASK_ALTIVEC \
10911
+/* For now, don't provide an embedded version of ISA 2.07. */
10912
+#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \
10913
+ | OPTION_MASK_P8_FUSION \
10914
+ | OPTION_MASK_P8_VECTOR \
10915
+ | OPTION_MASK_CRYPTO \
10916
+ | OPTION_MASK_DIRECT_MOVE \
10917
+ | OPTION_MASK_HTM \
10918
+ | OPTION_MASK_QUAD_MEMORY)
10920
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
10922
/* Deal with ports that do not have -mstrict-align. */
10923
@@ -60,23 +71,30 @@
10924
/* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */
10925
#define POWERPC_MASKS (OPTION_MASK_ALTIVEC \
10926
| OPTION_MASK_CMPB \
10927
+ | OPTION_MASK_CRYPTO \
10928
| OPTION_MASK_DFP \
10929
+ | OPTION_MASK_DIRECT_MOVE \
10930
| OPTION_MASK_DLMZB \
10931
| OPTION_MASK_FPRND \
10932
+ | OPTION_MASK_HTM \
10933
| OPTION_MASK_ISEL \
10934
| OPTION_MASK_MFCRF \
10935
| OPTION_MASK_MFPGPR \
10936
| OPTION_MASK_MULHW \
10937
| OPTION_MASK_NO_UPDATE \
10938
+ | OPTION_MASK_P8_FUSION \
10939
+ | OPTION_MASK_P8_VECTOR \
10940
| OPTION_MASK_POPCNTB \
10941
| OPTION_MASK_POPCNTD \
10942
| OPTION_MASK_POWERPC64 \
10943
| OPTION_MASK_PPC_GFXOPT \
10944
| OPTION_MASK_PPC_GPOPT \
10945
+ | OPTION_MASK_QUAD_MEMORY \
10946
| OPTION_MASK_RECIP_PRECISION \
10947
| OPTION_MASK_SOFT_FLOAT \
10948
| OPTION_MASK_STRICT_ALIGN_OPTIONAL \
10949
- | OPTION_MASK_VSX)
10950
+ | OPTION_MASK_VSX \
10951
+ | OPTION_MASK_VSX_TIMODE)
10955
@@ -166,10 +184,7 @@
10956
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
10957
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
10958
| MASK_VSX | MASK_RECIP_PRECISION)
10959
-RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */
10960
- POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
10961
- | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
10962
- | MASK_VSX | MASK_RECIP_PRECISION)
10963
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
10964
RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
10965
RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
10966
RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
10967
--- a/src/gcc/config/rs6000/htmintrin.h
10968
+++ b/src/gcc/config/rs6000/htmintrin.h
10970
+/* Hardware Transactional Memory (HTM) intrinsics.
10971
+ Copyright (C) 2013 Free Software Foundation, Inc.
10972
+ Contributed by Peter Bergner <bergner@vnet.ibm.com>.
10974
+ This file is free software; you can redistribute it and/or modify it under
10975
+ the terms of the GNU General Public License as published by the Free
10976
+ Software Foundation; either version 3 of the License, or (at your option)
10977
+ any later version.
10979
+ This file is distributed in the hope that it will be useful, but WITHOUT
10980
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10981
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
10982
+ for more details.
10984
+ Under Section 7 of GPL version 3, you are granted additional
10985
+ permissions described in the GCC Runtime Library Exception, version
10986
+ 3.1, as published by the Free Software Foundation.
10988
+ You should have received a copy of the GNU General Public License and
10989
+ a copy of the GCC Runtime Library Exception along with this program;
10990
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
10991
+ <http://www.gnu.org/licenses/>. */
10994
+# error "HTM instruction set not enabled"
10995
+#endif /* __HTM__ */
10997
+#ifndef _HTMINTRIN_H
10998
+#define _HTMINTRIN_H
11000
+#include <stdint.h>
11002
+typedef uint64_t texasr_t;
11003
+typedef uint32_t texasru_t;
11004
+typedef uint32_t texasrl_t;
11005
+typedef uintptr_t tfiar_t;
11006
+typedef uintptr_t tfhar_t;
11008
+#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)
11009
+#define _HTM_NONTRANSACTIONAL 0x0
11010
+#define _HTM_SUSPENDED 0x1
11011
+#define _HTM_TRANSACTIONAL 0x2
11013
+/* The following macros use the IBM bit numbering for BITNUM
11014
+ as used in the ISA documentation. */
11016
+#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
11017
+ (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))
11018
+#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
11019
+ (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))
11021
+#define _TEXASR_FAILURE_CODE(TEXASR) \
11022
+ _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)
11023
+#define _TEXASRU_FAILURE_CODE(TEXASRU) \
11024
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)
11026
+#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \
11027
+ _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)
11028
+#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \
11029
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)
11031
+#define _TEXASR_DISALLOWED(TEXASR) \
11032
+ _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)
11033
+#define _TEXASRU_DISALLOWED(TEXASRU) \
11034
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)
11036
+#define _TEXASR_NESTING_OVERFLOW(TEXASR) \
11037
+ _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)
11038
+#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \
11039
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)
11041
+#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \
11042
+ _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)
11043
+#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \
11044
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)
11046
+#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \
11047
+ _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)
11048
+#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \
11049
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)
11051
+#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \
11052
+ _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)
11053
+#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \
11054
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)
11056
+#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \
11057
+ _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)
11058
+#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \
11059
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)
11061
+#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \
11062
+ _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)
11063
+#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \
11064
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)
11066
+#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \
11067
+ _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)
11068
+#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \
11069
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)
11071
+#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \
11072
+ _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)
11073
+#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \
11074
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)
11076
+#define _TEXASR_ABORT(TEXASR) \
11077
+ _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)
11078
+#define _TEXASRU_ABORT(TEXASRU) \
11079
+ _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)
11082
+#define _TEXASR_SUSPENDED(TEXASR) \
11083
+ _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)
11085
+#define _TEXASR_PRIVILEGE(TEXASR) \
11086
+ _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)
11088
+#define _TEXASR_FAILURE_SUMMARY(TEXASR) \
11089
+ _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)
11091
+#define _TEXASR_TFIAR_EXACT(TEXASR) \
11092
+ _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)
11094
+#define _TEXASR_ROT(TEXASR) \
11095
+ _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)
11097
+#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \
11098
+ _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)
11100
+#endif /* _HTMINTRIN_H */
11101
--- a/src/gcc/config/rs6000/rs6000-protos.h
11102
+++ b/src/gcc/config/rs6000/rs6000-protos.h
11103
@@ -50,11 +50,13 @@
11104
extern rtx find_addr_reg (rtx);
11105
extern rtx gen_easy_altivec_constant (rtx);
11106
extern const char *output_vec_const_move (rtx *);
11107
+extern const char *rs6000_output_move_128bit (rtx *);
11108
extern void rs6000_expand_vector_init (rtx, rtx);
11109
extern void paired_expand_vector_init (rtx, rtx);
11110
extern void rs6000_expand_vector_set (rtx, rtx, int);
11111
extern void rs6000_expand_vector_extract (rtx, rtx, int);
11112
extern bool altivec_expand_vec_perm_const (rtx op[4]);
11113
+extern void altivec_expand_vec_perm_le (rtx op[4]);
11114
extern bool rs6000_expand_vec_perm_const (rtx op[4]);
11115
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
11116
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
11118
extern int registers_ok_for_quad_peep (rtx, rtx);
11119
extern int mems_ok_for_quad_peep (rtx, rtx);
11120
extern bool gpr_or_gpr_p (rtx, rtx);
11121
+extern bool direct_move_p (rtx, rtx);
11122
+extern bool quad_load_store_p (rtx, rtx);
11123
+extern bool fusion_gpr_load_p (rtx *, bool);
11124
+extern void expand_fusion_gpr_load (rtx *);
11125
+extern const char *emit_fusion_gpr_load (rtx *);
11126
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
11128
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
11129
@@ -116,6 +123,7 @@
11130
extern void rs6000_fatal_bad_address (rtx);
11131
extern rtx create_TOC_reference (rtx, rtx);
11132
extern void rs6000_split_multireg_move (rtx, rtx);
11133
+extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
11134
extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
11135
extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
11136
extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
11137
@@ -135,6 +143,7 @@
11138
extern rtx rs6000_address_for_altivec (rtx);
11139
extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
11140
extern int rs6000_loop_align (rtx);
11141
+extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx);
11142
#endif /* RTX_CODE */
11145
@@ -146,6 +155,7 @@
11146
extern rtx rs6000_libcall_value (enum machine_mode);
11147
extern rtx rs6000_va_arg (tree, tree);
11148
extern int function_ok_for_sibcall (tree);
11149
+extern int rs6000_reg_parm_stack_space (tree);
11150
extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
11151
extern bool rs6000_elf_in_small_data_p (const_tree);
11152
#ifdef ARGS_SIZE_RTX
11153
@@ -170,7 +180,8 @@
11154
extern void rs6000_emit_epilogue (int);
11155
extern void rs6000_emit_eh_reg_restore (rtx, rtx);
11156
extern const char * output_isel (rtx *);
11157
-extern void rs6000_call_indirect_aix (rtx, rtx, rtx);
11158
+extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
11159
+extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
11160
extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
11161
extern void get_ppc476_thunk_name (char name[32]);
11162
extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins);
11163
--- a/src/gcc/config/rs6000/t-rs6000
11164
+++ b/src/gcc/config/rs6000/t-rs6000
11166
$(srcdir)/config/rs6000/power5.md \
11167
$(srcdir)/config/rs6000/power6.md \
11168
$(srcdir)/config/rs6000/power7.md \
11169
+ $(srcdir)/config/rs6000/power8.md \
11170
$(srcdir)/config/rs6000/cell.md \
11171
$(srcdir)/config/rs6000/xfpu.md \
11172
$(srcdir)/config/rs6000/a2.md \
11174
$(srcdir)/config/rs6000/vector.md \
11175
$(srcdir)/config/rs6000/vsx.md \
11176
$(srcdir)/config/rs6000/altivec.md \
11177
+ $(srcdir)/config/rs6000/crypto.md \
11178
+ $(srcdir)/config/rs6000/htm.md \
11179
$(srcdir)/config/rs6000/spe.md \
11180
$(srcdir)/config/rs6000/dfp.md \
11181
$(srcdir)/config/rs6000/paired.md
11182
--- a/src/gcc/config/rs6000/htmxlintrin.h
11183
+++ b/src/gcc/config/rs6000/htmxlintrin.h
11185
+/* XL compiler Hardware Transactional Memory (HTM) execution intrinsics.
11186
+ Copyright (C) 2013 Free Software Foundation, Inc.
11187
+ Contributed by Peter Bergner <bergner@vnet.ibm.com>.
11189
+ This file is free software; you can redistribute it and/or modify it under
11190
+ the terms of the GNU General Public License as published by the Free
11191
+ Software Foundation; either version 3 of the License, or (at your option)
11192
+ any later version.
11194
+ This file is distributed in the hope that it will be useful, but WITHOUT
11195
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11196
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11197
+ for more details.
11199
+ Under Section 7 of GPL version 3, you are granted additional
11200
+ permissions described in the GCC Runtime Library Exception, version
11201
+ 3.1, as published by the Free Software Foundation.
11203
+ You should have received a copy of the GNU General Public License and
11204
+ a copy of the GCC Runtime Library Exception along with this program;
11205
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
11206
+ <http://www.gnu.org/licenses/>. */
11209
+# error "HTM instruction set not enabled"
11210
+#endif /* __HTM__ */
11212
+#ifndef _HTMXLINTRIN_H
11213
+#define _HTMXLINTRIN_H
11215
+#include <stdint.h>
11216
+#include <htmintrin.h>
11218
+#ifdef __cplusplus
11222
+#define _TEXASR_PTR(TM_BUF) \
11223
+ ((texasr_t *)((TM_BUF)+0))
11224
+#define _TEXASRU_PTR(TM_BUF) \
11225
+ ((texasru_t *)((TM_BUF)+0))
11226
+#define _TEXASRL_PTR(TM_BUF) \
11227
+ ((texasrl_t *)((TM_BUF)+4))
11228
+#define _TFIAR_PTR(TM_BUF) \
11229
+ ((tfiar_t *)((TM_BUF)+8))
11231
+typedef char TM_buff_type[16];
11233
+extern __inline long
11234
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235
+__TM_simple_begin (void)
11237
+ if (__builtin_expect (__builtin_tbegin (0), 1))
11242
+extern __inline long
11243
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11244
+__TM_begin (void* const TM_buff)
11246
+ *_TEXASRL_PTR (TM_buff) = 0;
11247
+ if (__builtin_expect (__builtin_tbegin (0), 1))
11249
+#ifdef __powerpc64__
11250
+ *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
11252
+ *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru ();
11253
+ *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr ();
11255
+ *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar ();
11259
+extern __inline long
11260
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11263
+ if (__builtin_expect (__builtin_tend (0), 1))
11268
+extern __inline void
11269
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11272
+ __builtin_tabort (0);
11275
+extern __inline void
11276
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277
+__TM_named_abort (unsigned char const code)
11279
+ __builtin_tabort (code);
11282
+extern __inline void
11283
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11284
+__TM_resume (void)
11286
+ __builtin_tresume ();
11289
+extern __inline void
11290
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291
+__TM_suspend (void)
11293
+ __builtin_tsuspend ();
11296
+extern __inline long
11297
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11298
+__TM_is_user_abort (void* const TM_buff)
11300
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11301
+ return _TEXASRU_ABORT (texasru);
11304
+extern __inline long
11305
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11306
+__TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
11308
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11310
+ *code = _TEXASRU_FAILURE_CODE (texasru);
11311
+ return _TEXASRU_ABORT (texasru);
11314
+extern __inline long
11315
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11316
+__TM_is_illegal (void* const TM_buff)
11318
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11319
+ return _TEXASRU_DISALLOWED (texasru);
11322
+extern __inline long
11323
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324
+__TM_is_footprint_exceeded (void* const TM_buff)
11326
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11327
+ return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);
11330
+extern __inline long
11331
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11332
+__TM_nesting_depth (void* const TM_buff)
11334
+ texasrl_t texasrl;
11336
+ if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)
11338
+ texasrl = *_TEXASRL_PTR (TM_buff);
11339
+ if (!_TEXASR_FAILURE_SUMMARY (texasrl))
11343
+ texasrl = (texasrl_t) __builtin_get_texasr ();
11345
+ return _TEXASR_TRANSACTION_LEVEL (texasrl);
11348
+extern __inline long
11349
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350
+__TM_is_nested_too_deep(void* const TM_buff)
11352
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11353
+ return _TEXASRU_NESTING_OVERFLOW (texasru);
11356
+extern __inline long
11357
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358
+__TM_is_conflict(void* const TM_buff)
11360
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11361
+ /* Return TEXASR bits 11 (Self-Induced Conflict) through
11362
+ 14 (Translation Invalidation Conflict). */
11363
+ return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;
11366
+extern __inline long
11367
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368
+__TM_is_failure_persistent(void* const TM_buff)
11370
+ texasru_t texasru = *_TEXASRU_PTR (TM_buff);
11371
+ return _TEXASRU_FAILURE_PERSISTENT (texasru);
11374
+extern __inline long
11375
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11376
+__TM_failure_address(void* const TM_buff)
11378
+ return *_TFIAR_PTR (TM_buff);
11381
+extern __inline long long
11382
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11383
+__TM_failure_code(void* const TM_buff)
11385
+ return *_TEXASR_PTR (TM_buff);
11388
+#ifdef __cplusplus
11392
+#endif /* _HTMXLINTRIN_H */
11393
--- a/src/gcc/config/rs6000/rs6000-builtin.def
11394
+++ b/src/gcc/config/rs6000/rs6000-builtin.def
11396
RS6000_BUILTIN_A -- ABS builtins
11397
RS6000_BUILTIN_D -- DST builtins
11398
RS6000_BUILTIN_E -- SPE EVSEL builtins.
11399
- RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
11400
+ RS6000_BUILTIN_H -- HTM builtins
11401
+ RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
11402
RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
11403
RS6000_BUILTIN_S -- SPE predicate builtins
11404
RS6000_BUILTIN_X -- special builtins
11406
#error "RS6000_BUILTIN_E is not defined."
11409
+#ifndef RS6000_BUILTIN_H
11410
+ #error "RS6000_BUILTIN_H is not defined."
11413
#ifndef RS6000_BUILTIN_P
11414
#error "RS6000_BUILTIN_P is not defined."
11416
@@ -301,6 +306,158 @@
11417
| RS6000_BTC_SPECIAL), \
11418
CODE_FOR_nothing) /* ICODE */
11420
+/* ISA 2.07 (power8) vector convenience macros. */
11421
+/* For the instructions that are encoded as altivec instructions use
11422
+ __builtin_altivec_ as the builtin name. */
11423
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \
11424
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
11425
+ "__builtin_altivec_" NAME, /* NAME */ \
11426
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11427
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11428
+ | RS6000_BTC_UNARY), \
11429
+ CODE_FOR_ ## ICODE) /* ICODE */
11431
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \
11432
+ RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
11433
+ "__builtin_altivec_" NAME, /* NAME */ \
11434
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11435
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11436
+ | RS6000_BTC_BINARY), \
11437
+ CODE_FOR_ ## ICODE) /* ICODE */
11439
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \
11440
+ RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
11441
+ "__builtin_altivec_" NAME, /* NAME */ \
11442
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11443
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11444
+ | RS6000_BTC_PREDICATE), \
11445
+ CODE_FOR_ ## ICODE) /* ICODE */
11447
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
11449
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \
11450
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
11451
+ "__builtin_vsx_" NAME, /* NAME */ \
11452
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11453
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11454
+ | RS6000_BTC_UNARY), \
11455
+ CODE_FOR_ ## ICODE) /* ICODE */
11457
+#define BU_P8V_OVERLOAD_1(ENUM, NAME) \
11458
+ RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
11459
+ "__builtin_vec_" NAME, /* NAME */ \
11460
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11461
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
11462
+ | RS6000_BTC_UNARY), \
11463
+ CODE_FOR_nothing) /* ICODE */
11465
+#define BU_P8V_OVERLOAD_2(ENUM, NAME) \
11466
+ RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
11467
+ "__builtin_vec_" NAME, /* NAME */ \
11468
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
11469
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
11470
+ | RS6000_BTC_BINARY), \
11471
+ CODE_FOR_nothing) /* ICODE */
11473
+/* Crypto convenience macros. */
11474
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \
11475
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11476
+ "__builtin_crypto_" NAME, /* NAME */ \
11477
+ RS6000_BTM_CRYPTO, /* MASK */ \
11478
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11479
+ | RS6000_BTC_UNARY), \
11480
+ CODE_FOR_ ## ICODE) /* ICODE */
11482
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \
11483
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11484
+ "__builtin_crypto_" NAME, /* NAME */ \
11485
+ RS6000_BTM_CRYPTO, /* MASK */ \
11486
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11487
+ | RS6000_BTC_BINARY), \
11488
+ CODE_FOR_ ## ICODE) /* ICODE */
11490
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \
11491
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11492
+ "__builtin_crypto_" NAME, /* NAME */ \
11493
+ RS6000_BTM_CRYPTO, /* MASK */ \
11494
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11495
+ | RS6000_BTC_TERNARY), \
11496
+ CODE_FOR_ ## ICODE) /* ICODE */
11498
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \
11499
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11500
+ "__builtin_crypto_" NAME, /* NAME */ \
11501
+ RS6000_BTM_CRYPTO, /* MASK */ \
11502
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
11503
+ | RS6000_BTC_UNARY), \
11504
+ CODE_FOR_nothing) /* ICODE */
11506
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \
11507
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11508
+ "__builtin_crypto_" NAME, /* NAME */ \
11509
+ RS6000_BTM_CRYPTO, /* MASK */ \
11510
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
11511
+ | RS6000_BTC_BINARY), \
11512
+ CODE_FOR_nothing) /* ICODE */
11514
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \
11515
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
11516
+ "__builtin_crypto_" NAME, /* NAME */ \
11517
+ RS6000_BTM_CRYPTO, /* MASK */ \
11518
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
11519
+ | RS6000_BTC_TERNARY), \
11520
+ CODE_FOR_nothing) /* ICODE */
11522
+/* HTM convenience macros. */
11523
+#define BU_HTM_0(ENUM, NAME, ATTR, ICODE) \
11524
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11525
+ "__builtin_" NAME, /* NAME */ \
11526
+ RS6000_BTM_HTM, /* MASK */ \
11527
+ RS6000_BTC_ ## ATTR, /* ATTR */ \
11528
+ CODE_FOR_ ## ICODE) /* ICODE */
11530
+#define BU_HTM_1(ENUM, NAME, ATTR, ICODE) \
11531
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11532
+ "__builtin_" NAME, /* NAME */ \
11533
+ RS6000_BTM_HTM, /* MASK */ \
11534
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11535
+ | RS6000_BTC_UNARY), \
11536
+ CODE_FOR_ ## ICODE) /* ICODE */
11538
+#define BU_HTM_2(ENUM, NAME, ATTR, ICODE) \
11539
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11540
+ "__builtin_" NAME, /* NAME */ \
11541
+ RS6000_BTM_HTM, /* MASK */ \
11542
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11543
+ | RS6000_BTC_BINARY), \
11544
+ CODE_FOR_ ## ICODE) /* ICODE */
11546
+#define BU_HTM_3(ENUM, NAME, ATTR, ICODE) \
11547
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11548
+ "__builtin_" NAME, /* NAME */ \
11549
+ RS6000_BTM_HTM, /* MASK */ \
11550
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11551
+ | RS6000_BTC_TERNARY), \
11552
+ CODE_FOR_ ## ICODE) /* ICODE */
11554
+#define BU_HTM_SPR0(ENUM, NAME, ATTR, ICODE) \
11555
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11556
+ "__builtin_" NAME, /* NAME */ \
11557
+ RS6000_BTM_HTM, /* MASK */ \
11558
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11559
+ | RS6000_BTC_SPR), \
11560
+ CODE_FOR_ ## ICODE) /* ICODE */
11562
+#define BU_HTM_SPR1(ENUM, NAME, ATTR, ICODE) \
11563
+ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \
11564
+ "__builtin_" NAME, /* NAME */ \
11565
+ RS6000_BTM_HTM, /* MASK */ \
11566
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
11567
+ | RS6000_BTC_UNARY \
11568
+ | RS6000_BTC_SPR \
11569
+ | RS6000_BTC_VOID), \
11570
+ CODE_FOR_ ## ICODE) /* ICODE */
11572
/* SPE convenience macros. */
11573
#define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \
11574
RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \
11575
@@ -1012,7 +1169,7 @@
11576
BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2)
11578
BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp)
11579
-BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp)
11580
+BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp)
11581
BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp)
11582
BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp)
11583
BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe)
11584
@@ -1052,9 +1209,9 @@
11586
BU_VSX_1 (XSRDPI, "xsrdpi", CONST, vsx_xsrdpi)
11587
BU_VSX_1 (XSRDPIC, "xsrdpic", CONST, vsx_xsrdpic)
11588
-BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, vsx_floordf2)
11589
-BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, vsx_ceildf2)
11590
-BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, vsx_btruncdf2)
11591
+BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, floordf2)
11592
+BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, ceildf2)
11593
+BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, btruncdf2)
11595
/* VSX predicate functions. */
11596
BU_VSX_P (XVCMPEQSP_P, "xvcmpeqsp_p", CONST, vector_eq_v4sf_p)
11597
@@ -1132,6 +1289,166 @@
11598
BU_VSX_OVERLOAD_X (LD, "ld")
11599
BU_VSX_OVERLOAD_X (ST, "st")
11601
+/* 1 argument VSX instructions added in ISA 2.07. */
11602
+BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
11603
+BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
11605
+/* 1 argument altivec instructions added in ISA 2.07. */
11606
+BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
11607
+BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw)
11608
+BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw)
11609
+BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2)
11610
+BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2)
11611
+BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2)
11612
+BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2)
11613
+BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2)
11614
+BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2)
11615
+BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2)
11616
+BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2)
11617
+BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
11619
+/* 2 argument altivec instructions added in ISA 2.07. */
11620
+BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
11621
+BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
11622
+BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
11623
+BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
11624
+BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
11625
+BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
11626
+BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
11627
+BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
11628
+BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
11629
+BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
11630
+BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpkswus)
11631
+BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3)
11632
+BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
11633
+BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
11634
+BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
11635
+BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
11637
+BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
11638
+BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
11639
+BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
11640
+BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
11641
+BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
11642
+BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
11644
+BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3)
11645
+BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
11646
+BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
11647
+BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
11648
+BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
11649
+BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
11651
+BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3)
11652
+BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
11653
+BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
11654
+BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
11655
+BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
11656
+BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
11658
+/* Vector comparison instructions added in ISA 2.07. */
11659
+BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
11660
+BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
11661
+BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di)
11663
+/* Vector comparison predicate instructions added in ISA 2.07. */
11664
+BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p)
11665
+BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p)
11666
+BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p)
11668
+/* ISA 2.07 vector overloaded 1 argument functions. */
11669
+BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw")
11670
+BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw")
11671
+BU_P8V_OVERLOAD_1 (VCLZ, "vclz")
11672
+BU_P8V_OVERLOAD_1 (VCLZB, "vclzb")
11673
+BU_P8V_OVERLOAD_1 (VCLZH, "vclzh")
11674
+BU_P8V_OVERLOAD_1 (VCLZW, "vclzw")
11675
+BU_P8V_OVERLOAD_1 (VCLZD, "vclzd")
11676
+BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt")
11677
+BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb")
11678
+BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth")
11679
+BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw")
11680
+BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd")
11681
+BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
11683
+/* ISA 2.07 vector overloaded 2 argument functions. */
11684
+BU_P8V_OVERLOAD_2 (EQV, "eqv")
11685
+BU_P8V_OVERLOAD_2 (NAND, "nand")
11686
+BU_P8V_OVERLOAD_2 (ORC, "orc")
11687
+BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
11688
+BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
11689
+BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
11690
+BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
11691
+BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
11692
+BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
11693
+BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
11694
+BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
11695
+BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
11696
+BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
11697
+BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus")
11698
+BU_P8V_OVERLOAD_2 (VRLD, "vrld")
11699
+BU_P8V_OVERLOAD_2 (VSLD, "vsld")
11700
+BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
11701
+BU_P8V_OVERLOAD_2 (VSRD, "vsrd")
11702
+BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm")
11705
+/* 1 argument crypto functions. */
11706
+BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox)
11708
+/* 2 argument crypto functions. */
11709
+BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher)
11710
+BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast)
11711
+BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher)
11712
+BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast)
11713
+BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb)
11714
+BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh)
11715
+BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw)
11716
+BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd)
11718
+/* 3 argument crypto functions. */
11719
+BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di)
11720
+BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si)
11721
+BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi)
11722
+BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
11723
+BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw)
11724
+BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad)
11726
+/* 2 argument crypto overloaded functions. */
11727
+BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum")
11729
+/* 3 argument crypto overloaded functions. */
11730
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor")
11731
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
11734
+/* HTM functions. */
11735
+BU_HTM_1 (TABORT, "tabort", MISC, tabort)
11736
+BU_HTM_3 (TABORTDC, "tabortdc", MISC, tabortdc)
11737
+BU_HTM_3 (TABORTDCI, "tabortdci", MISC, tabortdci)
11738
+BU_HTM_3 (TABORTWC, "tabortwc", MISC, tabortwc)
11739
+BU_HTM_3 (TABORTWCI, "tabortwci", MISC, tabortwci)
11740
+BU_HTM_1 (TBEGIN, "tbegin", MISC, tbegin)
11741
+BU_HTM_1 (TCHECK, "tcheck", MISC, tcheck)
11742
+BU_HTM_1 (TEND, "tend", MISC, tend)
11743
+BU_HTM_0 (TENDALL, "tendall", MISC, tend)
11744
+BU_HTM_0 (TRECHKPT, "trechkpt", MISC, trechkpt)
11745
+BU_HTM_1 (TRECLAIM, "treclaim", MISC, treclaim)
11746
+BU_HTM_0 (TRESUME, "tresume", MISC, tsr)
11747
+BU_HTM_0 (TSUSPEND, "tsuspend", MISC, tsr)
11748
+BU_HTM_1 (TSR, "tsr", MISC, tsr)
11749
+BU_HTM_0 (TTEST, "ttest", MISC, ttest)
11751
+BU_HTM_SPR0 (GET_TFHAR, "get_tfhar", MISC, nothing)
11752
+BU_HTM_SPR1 (SET_TFHAR, "set_tfhar", MISC, nothing)
11753
+BU_HTM_SPR0 (GET_TFIAR, "get_tfiar", MISC, nothing)
11754
+BU_HTM_SPR1 (SET_TFIAR, "set_tfiar", MISC, nothing)
11755
+BU_HTM_SPR0 (GET_TEXASR, "get_texasr", MISC, nothing)
11756
+BU_HTM_SPR1 (SET_TEXASR, "set_texasr", MISC, nothing)
11757
+BU_HTM_SPR0 (GET_TEXASRU, "get_texasru", MISC, nothing)
11758
+BU_HTM_SPR1 (SET_TEXASRU, "set_texasru", MISC, nothing)
11761
/* 3 argument paired floating point builtins. */
11762
BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4)
11763
BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4)
11764
@@ -1430,10 +1747,10 @@
11767
BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
11768
- RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
11769
+ RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
11771
BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
11772
- RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
11773
+ RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
11775
/* Darwin CfString builtin. */
11776
BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
11777
--- a/src/gcc/config/rs6000/rs6000-c.c
11778
+++ b/src/gcc/config/rs6000/rs6000-c.c
11779
@@ -315,6 +315,8 @@
11780
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
11781
if ((flags & OPTION_MASK_POPCNTD) != 0)
11782
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
11783
+ if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
11784
+ rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
11785
if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
11786
rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
11787
if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
11788
@@ -331,6 +333,12 @@
11790
if ((flags & OPTION_MASK_VSX) != 0)
11791
rs6000_define_or_undefine_macro (define_p, "__VSX__");
11792
+ if ((flags & OPTION_MASK_HTM) != 0)
11793
+ rs6000_define_or_undefine_macro (define_p, "__HTM__");
11794
+ if ((flags & OPTION_MASK_P8_VECTOR) != 0)
11795
+ rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
11796
+ if ((flags & OPTION_MASK_CRYPTO) != 0)
11797
+ rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
11799
/* options from the builtin masks. */
11800
if ((bu_mask & RS6000_BTM_SPE) != 0)
11801
@@ -453,7 +461,11 @@
11803
builtin_define ("_CALL_AIXDESC");
11804
builtin_define ("_CALL_AIX");
11805
+ builtin_define ("_CALL_ELF=1");
11808
+ builtin_define ("_CALL_ELF=2");
11811
builtin_define ("_CALL_DARWIN");
11813
@@ -465,6 +477,13 @@
11814
if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
11815
builtin_define ("__NO_FPRS__");
11817
+ /* Whether aggregates passed by value are aligned to a 16 byte boundary
11818
+ if their alignment is 16 bytes or larger. */
11819
+ if ((TARGET_MACHO && rs6000_darwin64_abi)
11820
+ || DEFAULT_ABI == ABI_ELFv2
11821
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11822
+ builtin_define ("__STRUCT_PARM_ALIGN__=16");
11824
/* Generate defines for Xilinx FPU. */
11825
if (rs6000_xilinx_fpu)
11827
@@ -505,6 +524,8 @@
11828
RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
11829
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
11830
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
11831
+ { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
11832
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
11833
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
11834
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
11835
{ ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
11836
@@ -577,12 +598,24 @@
11837
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
11838
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
11839
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
11840
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
11841
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
11842
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
11843
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
11844
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
11845
RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
11846
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
11847
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
11848
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
11849
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
11850
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
11851
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
11852
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
11853
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
11854
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
11855
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
11856
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
11857
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
11858
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
11859
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
11860
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
11861
@@ -601,6 +634,10 @@
11862
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
11863
{ ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
11864
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
11865
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
11866
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
11867
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
11868
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
11869
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
11870
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
11871
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
11872
@@ -651,6 +688,18 @@
11873
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
11874
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
11875
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11876
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11877
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
11878
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11879
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
11880
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11881
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11882
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11883
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11884
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11885
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
11886
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
11887
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11888
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
11889
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11890
{ ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
11891
@@ -937,6 +986,10 @@
11892
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11893
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
11894
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11895
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
11896
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11897
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
11898
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11899
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
11900
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11901
{ ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
11902
@@ -975,6 +1028,10 @@
11903
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11904
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
11905
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11906
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
11907
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11908
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
11909
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11910
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
11911
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11912
{ ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
11913
@@ -1021,6 +1078,10 @@
11914
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11915
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
11916
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11917
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
11918
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11919
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
11920
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11921
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
11922
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11923
{ ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
11924
@@ -1418,6 +1479,18 @@
11925
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
11926
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
11927
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11928
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
11929
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11930
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
11931
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
11932
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
11933
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11934
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
11935
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
11936
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
11937
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
11938
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
11939
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11940
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
11941
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11942
{ ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
11943
@@ -1604,6 +1677,18 @@
11944
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
11945
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
11946
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11947
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
11948
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11949
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
11950
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
11951
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
11952
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11953
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
11954
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
11955
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
11956
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
11957
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
11958
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11959
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
11960
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
11961
{ ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
11962
@@ -1786,6 +1871,12 @@
11963
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11964
{ ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
11965
RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
11966
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
11967
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11968
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
11969
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11970
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
11971
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
11972
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
11973
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11974
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
11975
@@ -1812,6 +1903,10 @@
11976
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11977
{ ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
11978
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11979
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
11980
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
11981
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
11982
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11983
{ ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
11984
RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
11985
{ ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
11986
@@ -1824,6 +1919,8 @@
11987
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11988
{ ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
11989
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11990
+ { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
11991
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
11992
{ ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
11993
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
11994
{ ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
11995
@@ -1844,6 +1941,10 @@
11996
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11997
{ ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
11998
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
11999
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
12000
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12001
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
12002
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12003
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
12004
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12005
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
12006
@@ -1868,6 +1969,10 @@
12007
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12008
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
12009
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12010
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
12011
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12012
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
12013
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12014
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
12015
RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
12016
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
12017
@@ -2032,6 +2137,10 @@
12018
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12019
{ ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
12020
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12021
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
12022
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12023
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
12024
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12025
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
12026
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12027
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
12028
@@ -2056,6 +2165,10 @@
12029
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12030
{ ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
12031
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12032
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
12033
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12034
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
12035
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12036
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
12037
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12038
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
12039
@@ -2196,6 +2309,18 @@
12040
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
12041
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
12042
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
12043
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12044
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12045
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12046
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12047
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12048
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12049
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12050
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12051
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12052
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
12053
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
12054
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12055
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
12056
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
12057
{ ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
12058
@@ -3327,6 +3452,20 @@
12059
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
12060
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
12061
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
12062
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12063
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
12064
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12065
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
12066
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12067
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
12068
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12069
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
12070
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12071
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
12072
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12073
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
12074
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
12075
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
12076
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
12077
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
12078
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
12079
@@ -3372,11 +3511,455 @@
12080
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
12081
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
12082
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
12083
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
12084
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
12085
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
12086
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
12087
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
12088
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
12089
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
12090
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
12091
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
12092
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
12093
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
12094
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
12095
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
12096
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
12097
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
12098
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
12100
+ /* Power8 vector overloaded functions. */
12101
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12102
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
12103
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12104
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
12105
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12106
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
12107
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12108
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
12109
+ RS6000_BTI_unsigned_V16QI, 0 },
12110
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12111
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12112
+ RS6000_BTI_bool_V16QI, 0 },
12113
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
12114
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12115
+ RS6000_BTI_unsigned_V16QI, 0 },
12116
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12117
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
12118
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12119
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
12120
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12121
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
12122
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12123
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
12124
+ RS6000_BTI_unsigned_V8HI, 0 },
12125
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12126
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12127
+ RS6000_BTI_bool_V8HI, 0 },
12128
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
12129
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12130
+ RS6000_BTI_unsigned_V8HI, 0 },
12131
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12132
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
12133
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12134
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
12135
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12136
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
12137
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12138
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
12139
+ RS6000_BTI_unsigned_V4SI, 0 },
12140
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12141
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12142
+ RS6000_BTI_bool_V4SI, 0 },
12143
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
12144
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12145
+ RS6000_BTI_unsigned_V4SI, 0 },
12146
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12147
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12148
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12149
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12150
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12151
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12152
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12153
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
12154
+ RS6000_BTI_unsigned_V2DI, 0 },
12155
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12156
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12157
+ RS6000_BTI_bool_V2DI, 0 },
12158
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
12159
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12160
+ RS6000_BTI_unsigned_V2DI, 0 },
12161
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
12162
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
12163
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
12164
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
12166
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12167
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
12168
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12169
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
12170
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12171
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
12172
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12173
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
12174
+ RS6000_BTI_unsigned_V16QI, 0 },
12175
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12176
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12177
+ RS6000_BTI_bool_V16QI, 0 },
12178
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
12179
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12180
+ RS6000_BTI_unsigned_V16QI, 0 },
12181
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12182
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
12183
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12184
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
12185
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12186
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
12187
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12188
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
12189
+ RS6000_BTI_unsigned_V8HI, 0 },
12190
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12191
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12192
+ RS6000_BTI_bool_V8HI, 0 },
12193
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
12194
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12195
+ RS6000_BTI_unsigned_V8HI, 0 },
12196
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12197
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
12198
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12199
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
12200
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12201
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
12202
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12203
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
12204
+ RS6000_BTI_unsigned_V4SI, 0 },
12205
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12206
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12207
+ RS6000_BTI_bool_V4SI, 0 },
12208
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
12209
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12210
+ RS6000_BTI_unsigned_V4SI, 0 },
12211
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12212
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12213
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12214
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12215
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12216
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12217
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12218
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
12219
+ RS6000_BTI_unsigned_V2DI, 0 },
12220
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12221
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12222
+ RS6000_BTI_bool_V2DI, 0 },
12223
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
12224
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12225
+ RS6000_BTI_unsigned_V2DI, 0 },
12226
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
12227
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
12228
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
12229
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
12231
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12232
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
12233
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12234
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
12235
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12236
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
12237
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12238
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
12239
+ RS6000_BTI_unsigned_V16QI, 0 },
12240
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12241
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12242
+ RS6000_BTI_bool_V16QI, 0 },
12243
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
12244
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12245
+ RS6000_BTI_unsigned_V16QI, 0 },
12246
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12247
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
12248
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12249
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
12250
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12251
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
12252
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12253
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
12254
+ RS6000_BTI_unsigned_V8HI, 0 },
12255
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12256
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12257
+ RS6000_BTI_bool_V8HI, 0 },
12258
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
12259
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12260
+ RS6000_BTI_unsigned_V8HI, 0 },
12261
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12262
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
12263
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12264
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
12265
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12266
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
12267
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12268
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
12269
+ RS6000_BTI_unsigned_V4SI, 0 },
12270
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12271
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12272
+ RS6000_BTI_bool_V4SI, 0 },
12273
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
12274
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12275
+ RS6000_BTI_unsigned_V4SI, 0 },
12276
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12277
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12278
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12279
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12280
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12281
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12282
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12283
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
12284
+ RS6000_BTI_unsigned_V2DI, 0 },
12285
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12286
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12287
+ RS6000_BTI_bool_V2DI, 0 },
12288
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
12289
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12290
+ RS6000_BTI_unsigned_V2DI, 0 },
12291
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
12292
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
12293
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
12294
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
12296
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12297
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12298
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12299
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12300
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12301
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12302
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12303
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12304
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12305
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
12306
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
12307
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12309
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
12310
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
12311
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
12312
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
12313
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
12314
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
12315
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
12316
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
12317
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
12318
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
12319
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
12320
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
12321
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
12322
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
12323
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
12324
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
12326
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
12327
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
12328
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
12329
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
12331
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
12332
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
12333
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
12334
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
12336
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
12337
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
12338
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
12339
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
12341
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
12342
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
12343
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
12344
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
12346
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
12347
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
12348
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
12349
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
12351
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
12352
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12353
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
12354
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12355
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
12356
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12358
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
12359
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12360
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
12361
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12362
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
12363
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12365
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
12366
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
12367
+ RS6000_BTI_unsigned_V2DI, 0 },
12368
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
12369
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12370
+ RS6000_BTI_bool_V2DI, 0 },
12371
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
12372
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12373
+ RS6000_BTI_unsigned_V2DI, 0 },
12375
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
12376
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
12377
+ RS6000_BTI_unsigned_V2DI, 0 },
12378
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
12379
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12380
+ RS6000_BTI_bool_V2DI, 0 },
12381
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
12382
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12383
+ RS6000_BTI_unsigned_V2DI, 0 },
12385
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
12386
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
12387
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
12388
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12389
+ RS6000_BTI_unsigned_V4SI, 0 },
12391
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
12392
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
12393
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
12394
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12395
+ RS6000_BTI_unsigned_V4SI, 0 },
12397
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
12398
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
12399
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
12400
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
12401
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
12402
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
12403
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
12404
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
12405
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
12406
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
12407
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
12408
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
12409
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
12410
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
12411
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
12412
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
12414
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
12415
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
12416
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
12417
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
12419
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
12420
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
12421
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
12422
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
12424
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
12425
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
12426
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
12427
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
12429
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
12430
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
12431
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
12432
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
12434
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
12435
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12436
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
12437
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12438
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
12439
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
12441
+ { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
12442
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12444
+ { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
12445
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12447
+ { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
12448
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12450
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
12451
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12452
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
12453
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12455
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
12456
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12457
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
12458
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12460
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
12461
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12462
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
12463
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12465
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
12466
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12467
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
12468
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12470
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12471
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
12472
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12473
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
12474
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12475
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
12476
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12477
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12478
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12479
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
12480
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
12481
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
12483
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
12484
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
12485
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
12486
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
12488
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
12489
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
12490
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
12491
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
12493
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
12494
+ RS6000_BTI_V16QI, 0, 0, 0 },
12495
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
12496
+ RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
12498
+ /* Crypto builtins. */
12499
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
12500
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12501
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
12502
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
12503
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12504
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
12505
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
12506
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12507
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
12508
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
12509
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12510
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
12512
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
12513
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
12514
+ RS6000_BTI_unsigned_V16QI, 0 },
12515
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
12516
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
12517
+ RS6000_BTI_unsigned_V8HI, 0 },
12518
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
12519
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12520
+ RS6000_BTI_unsigned_V4SI, 0 },
12521
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
12522
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12523
+ RS6000_BTI_unsigned_V2DI, 0 },
12525
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
12526
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
12527
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
12528
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
12529
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
12530
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
12532
{ (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
12535
@@ -3824,7 +4407,8 @@
12536
&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
12537
|| rs6000_builtin_type_compatible (types[1], desc->op2))
12538
&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
12539
- || rs6000_builtin_type_compatible (types[2], desc->op3)))
12540
+ || rs6000_builtin_type_compatible (types[2], desc->op3))
12541
+ && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
12542
return altivec_build_resolved_builtin (args, n, desc);
12545
--- a/src/gcc/config/rs6000/rs6000.opt
12546
+++ b/src/gcc/config/rs6000/rs6000.opt
12547
@@ -181,13 +181,16 @@
12548
Target Report Mask(VSX) Var(rs6000_isa_flags)
12549
Use vector/scalar (VSX) instructions
12552
+Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1)
12553
+; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default)
12556
-Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1)
12557
-; If -mvsx, use VSX arithmetic instructions for scalar double (on by default)
12558
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
12559
+; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
12562
-Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY)
12563
-; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
12564
+Target Undocumented Report Alias(mupper-regs-df)
12567
Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
12568
@@ -363,6 +366,14 @@
12569
Target RejectNegative Var(rs6000_spe_abi, 0)
12570
Do not use the SPE ABI extensions
12573
+Target RejectNegative Var(rs6000_elf_abi, 1) Save
12577
+Target RejectNegative Var(rs6000_elf_abi, 2)
12580
; These are here for testing during development only, do not document
12581
; in the manual please.
12583
@@ -514,3 +525,47 @@
12585
Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save
12586
Control whether we save the TOC in the prologue for indirect calls or generate the save inline
12589
+Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
12590
+Allow 128-bit integers in VSX registers
12593
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
12594
+Fuse certain integer operations together for better performance on power8
12596
+mpower8-fusion-sign
12597
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
12598
+Allow sign extension in fusion operations
12601
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
12602
+Use/do not use vector and scalar instructions added in ISA 2.07.
12605
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
12606
+Use ISA 2.07 crypto instructions
12609
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
12610
+Use ISA 2.07 direct move between GPR & VSX register instructions
12613
+Target Report Mask(HTM) Var(rs6000_isa_flags)
12614
+Use ISA 2.07 transactional memory (HTM) instructions
12617
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
12618
+Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
12620
+mcompat-align-parm
12621
+Target Report Var(rs6000_compat_align_parm) Init(1) Save
12622
+Generate aggregate parameter passing code with at most 64-bit alignment.
12625
+Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
12626
+Allow double variables in upper registers with -mcpu=power7 or -mvsx
12629
+Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
12630
+Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
12631
--- a/src/gcc/config/rs6000/linux64.h
12632
+++ b/src/gcc/config/rs6000/linux64.h
12635
#ifndef RS6000_BI_ARCH
12637
-#undef DEFAULT_ABI
12638
-#define DEFAULT_ABI ABI_AIX
12640
#undef TARGET_64BIT
12641
#define TARGET_64BIT 1
12644
#undef PROCESSOR_DEFAULT
12645
#define PROCESSOR_DEFAULT PROCESSOR_POWER7
12646
#undef PROCESSOR_DEFAULT64
12647
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
12648
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
12650
#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
12653
/* We don't need to generate entries in .fixup, except when
12654
-mrelocatable or -mrelocatable-lib is given. */
12656
#define INVALID_64BIT "-m%s not supported in this configuration"
12657
#define INVALID_32BIT INVALID_64BIT
12659
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
12660
+#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1)
12662
+#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2)
12665
#undef SUBSUBTARGET_OVERRIDE_OPTIONS
12666
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
12668
@@ -102,6 +109,12 @@
12669
error (INVALID_64BIT, "call"); \
12671
dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \
12672
+ if (ELFv2_ABI_CHECK) \
12674
+ rs6000_current_abi = ABI_ELFv2; \
12675
+ if (dot_symbols) \
12676
+ error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \
12678
if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \
12680
rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
12681
@@ -351,7 +364,11 @@
12682
#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
12684
#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
12685
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld64.so.1"
12686
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
12687
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
12689
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
12691
#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
12692
#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
12693
#if DEFAULT_LIBC == LIBC_UCLIBC
12694
--- a/src/gcc/config/rs6000/darwin.h
12695
+++ b/src/gcc/config/rs6000/darwin.h
12696
@@ -205,7 +205,8 @@
12697
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", \
12698
"vrsave", "vscr", \
12699
"spe_acc", "spefscr", \
12702
+ "tfhar", "tfiar", "texasr" \
12705
/* This outputs NAME to FILE. */
12706
--- a/src/gcc/config/rs6000/rs6000.c
12707
+++ b/src/gcc/config/rs6000/rs6000.c
12709
int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
12710
int varargs_save_offset; /* offset to save the varargs registers */
12711
int ehrd_offset; /* offset to EH return data */
12712
+ int ehcr_offset; /* offset to EH CR field data */
12713
int reg_size; /* register size (4 or 8) */
12714
HOST_WIDE_INT vars_size; /* variable save area size */
12715
int parm_size; /* outgoing parameter size */
12716
@@ -139,6 +140,8 @@
12717
64-bits wide and is allocated early enough so that the offset
12718
does not overflow the 16-bit load/store offset field. */
12719
rtx sdmode_stack_slot;
12720
+ /* Flag if r2 setup is needed with ELFv2 ABI. */
12721
+ bool r2_setup_needed;
12722
} machine_function;
12724
/* Support targetm.vectorize.builtin_mask_for_load. */
12725
@@ -189,9 +192,6 @@
12726
/* Map register number to register class. */
12727
enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
12729
-/* Reload functions based on the type and the vector unit. */
12730
-static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
12732
static int dbg_cost_ctrl;
12734
/* Built in types. */
12735
@@ -289,6 +289,105 @@
12736
don't link in rs6000-c.c, so we can't call it directly. */
12737
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
12739
+/* Simplfy register classes into simpler classifications. We assume
12740
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
12741
+ check for standard register classes (gpr/floating/altivec/vsx) and
12742
+ floating/vector classes (float/altivec/vsx). */
12744
+enum rs6000_reg_type {
12749
+ ALTIVEC_REG_TYPE,
12757
+/* Map register class to register type. */
12758
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
12760
+/* First/last register type for the 'normal' register types (i.e. general
12761
+ purpose, floating point, altivec, and VSX registers). */
12762
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
12764
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
12767
+/* Register classes we care about in secondary reload or go if legitimate
12768
+ address. We only need to worry about GPR, FPR, and Altivec registers here,
12769
+ along an ANY field that is the OR of the 3 register classes. */
12771
+enum rs6000_reload_reg_type {
12772
+ RELOAD_REG_GPR, /* General purpose registers. */
12773
+ RELOAD_REG_FPR, /* Traditional floating point regs. */
12774
+ RELOAD_REG_VMX, /* Altivec (VMX) registers. */
12775
+ RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
12779
+/* For setting up register classes, loop through the 3 register classes mapping
12780
+ into real registers, and skip the ANY class, which is just an OR of the
12782
+#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
12783
+#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
12785
+/* Map reload register type to a register in the register class. */
12786
+struct reload_reg_map_type {
12787
+ const char *name; /* Register class name. */
12788
+ int reg; /* Register in the register class. */
12791
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
12792
+ { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
12793
+ { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
12794
+ { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
12795
+ { "Any", -1 }, /* RELOAD_REG_ANY. */
12798
+/* Mask bits for each register class, indexed per mode. Historically the
12799
+ compiler has been more restrictive which types can do PRE_MODIFY instead of
12800
+ PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
12801
+typedef unsigned char addr_mask_type;
12803
+#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
12804
+#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
12805
+#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
12806
+#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
12807
+#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
12808
+#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
12810
+/* Register type masks based on the type, of valid addressing modes. */
12811
+struct rs6000_reg_addr {
12812
+ enum insn_code reload_load; /* INSN to reload for loading. */
12813
+ enum insn_code reload_store; /* INSN to reload for storing. */
12814
+ enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
12815
+ enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
12816
+ enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
12817
+ addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
12820
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
12822
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
12823
+static inline bool
12824
+mode_supports_pre_incdec_p (enum machine_mode mode)
12826
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
12830
+/* Helper function to say whether a mode supports PRE_MODIFY. */
12831
+static inline bool
12832
+mode_supports_pre_modify_p (enum machine_mode mode)
12834
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
12839
/* Target cpu costs. */
12841
@@ -828,6 +927,25 @@
12842
12, /* prefetch streams */
12845
+/* Instruction costs on POWER8 processors. */
12847
+struct processor_costs power8_cost = {
12848
+ COSTS_N_INSNS (3), /* mulsi */
12849
+ COSTS_N_INSNS (3), /* mulsi_const */
12850
+ COSTS_N_INSNS (3), /* mulsi_const9 */
12851
+ COSTS_N_INSNS (3), /* muldi */
12852
+ COSTS_N_INSNS (19), /* divsi */
12853
+ COSTS_N_INSNS (35), /* divdi */
12854
+ COSTS_N_INSNS (3), /* fp */
12855
+ COSTS_N_INSNS (3), /* dmul */
12856
+ COSTS_N_INSNS (14), /* sdiv */
12857
+ COSTS_N_INSNS (17), /* ddiv */
12858
+ 128, /* cache line size */
12859
+ 32, /* l1 cache */
12860
+ 256, /* l2 cache */
12861
+ 12, /* prefetch streams */
12864
/* Instruction costs on POWER A2 processors. */
12866
struct processor_costs ppca2_cost = {
12867
@@ -855,6 +973,7 @@
12868
#undef RS6000_BUILTIN_A
12869
#undef RS6000_BUILTIN_D
12870
#undef RS6000_BUILTIN_E
12871
+#undef RS6000_BUILTIN_H
12872
#undef RS6000_BUILTIN_P
12873
#undef RS6000_BUILTIN_Q
12874
#undef RS6000_BUILTIN_S
12875
@@ -878,6 +997,9 @@
12876
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12877
{ NAME, ICODE, MASK, ATTR },
12879
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12880
+ { NAME, ICODE, MASK, ATTR },
12882
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12883
{ NAME, ICODE, MASK, ATTR },
12885
@@ -908,6 +1030,7 @@
12886
#undef RS6000_BUILTIN_A
12887
#undef RS6000_BUILTIN_D
12888
#undef RS6000_BUILTIN_E
12889
+#undef RS6000_BUILTIN_H
12890
#undef RS6000_BUILTIN_P
12891
#undef RS6000_BUILTIN_Q
12892
#undef RS6000_BUILTIN_S
12893
@@ -948,6 +1071,7 @@
12894
static void paired_init_builtins (void);
12895
static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
12896
static void spe_init_builtins (void);
12897
+static void htm_init_builtins (void);
12898
static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
12899
static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
12900
static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
12901
@@ -1020,6 +1144,13 @@
12902
static void rs6000_print_builtin_options (FILE *, int, const char *,
12905
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
12906
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
12907
+ enum rs6000_reg_type,
12908
+ enum machine_mode,
12909
+ secondary_reload_info *,
12912
/* Hash table stuff for keeping track of TOC entries. */
12914
struct GTY(()) toc_hash_struct
12915
@@ -1068,7 +1199,9 @@
12916
/* SPE registers. */
12917
"spe_acc", "spefscr",
12918
/* Soft frame pointer. */
12921
+ /* HTM SPR registers. */
12922
+ "tfhar", "tfiar", "texasr"
12925
#ifdef TARGET_REGNAMES
12926
@@ -1094,7 +1227,9 @@
12927
/* SPE registers. */
12928
"spe_acc", "spefscr",
12929
/* Soft frame pointer. */
12932
+ /* HTM SPR registers. */
12933
+ "tfhar", "tfiar", "texasr"
12937
@@ -1316,6 +1451,9 @@
12938
#undef TARGET_RETURN_IN_MEMORY
12939
#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
12941
+#undef TARGET_RETURN_IN_MSB
12942
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
12944
#undef TARGET_SETUP_INCOMING_VARARGS
12945
#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
12947
@@ -1513,8 +1651,9 @@
12949
unsigned HOST_WIDE_INT reg_size;
12951
+ /* TF/TD modes are special in that they always take 2 registers. */
12952
if (FP_REGNO_P (regno))
12953
- reg_size = (VECTOR_MEM_VSX_P (mode)
12954
+ reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
12955
? UNITS_PER_VSX_WORD
12956
: UNITS_PER_FP_WORD);
12958
@@ -1546,16 +1685,38 @@
12960
int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
12962
+ /* PTImode can only go in GPRs. Quad word memory operations require even/odd
12963
+ register combinations, and use PTImode where we need to deal with quad
12964
+ word memory operations. Don't allow quad words in the argument or frame
12965
+ pointer registers, just registers 0..31. */
12966
+ if (mode == PTImode)
12967
+ return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
12968
+ && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
12969
+ && ((regno & 1) == 0));
12971
/* VSX registers that overlap the FPR registers are larger than for non-VSX
12972
implementations. Don't allow an item to be split between a FP register
12973
- and an Altivec register. */
12974
- if (VECTOR_MEM_VSX_P (mode))
12975
+ and an Altivec register. Allow TImode in all VSX registers if the user
12977
+ if (TARGET_VSX && VSX_REGNO_P (regno)
12978
+ && (VECTOR_MEM_VSX_P (mode)
12979
+ || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
12980
+ || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
12981
+ || (TARGET_VSX_TIMODE && mode == TImode)))
12983
if (FP_REGNO_P (regno))
12984
return FP_REGNO_P (last_regno);
12986
if (ALTIVEC_REGNO_P (regno))
12987
- return ALTIVEC_REGNO_P (last_regno);
12989
+ if (mode == SFmode && !TARGET_UPPER_REGS_SF)
12992
+ if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
12995
+ return ALTIVEC_REGNO_P (last_regno);
12999
/* The GPRs can hold any mode, but values bigger than one register
13000
@@ -1564,8 +1725,7 @@
13001
return INT_REGNO_P (last_regno);
13003
/* The float registers (except for VSX vector modes) can only hold floating
13004
- modes and DImode. This excludes the 32-bit decimal float mode for
13006
+ modes and DImode. */
13007
if (FP_REGNO_P (regno))
13009
if (SCALAR_FLOAT_MODE_P (mode)
13010
@@ -1599,9 +1759,8 @@
13011
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
13014
- /* We cannot put TImode anywhere except general register and it must be able
13015
- to fit within the register set. In the future, allow TImode in the
13016
- Altivec or VSX registers. */
13017
+ /* We cannot put non-VSX TImode or PTImode anywhere except general register
13018
+ and it must be able to fit within the register set. */
13020
return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
13022
@@ -1674,10 +1833,77 @@
13026
+ len += fprintf (stderr, "%sreg-class = %s", comma,
13027
+ reg_class_names[(int)rs6000_regno_regclass[r]]);
13032
+ fprintf (stderr, ",\n\t");
13036
fprintf (stderr, "%sregno = %d\n", comma, r);
13040
+static const char *
13041
+rs6000_debug_vector_unit (enum rs6000_vector v)
13047
+ case VECTOR_NONE: ret = "none"; break;
13048
+ case VECTOR_ALTIVEC: ret = "altivec"; break;
13049
+ case VECTOR_VSX: ret = "vsx"; break;
13050
+ case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
13051
+ case VECTOR_PAIRED: ret = "paired"; break;
13052
+ case VECTOR_SPE: ret = "spe"; break;
13053
+ case VECTOR_OTHER: ret = "other"; break;
13054
+ default: ret = "unknown"; break;
13060
+/* Print the address masks in a human readble fashion. */
13061
+DEBUG_FUNCTION void
13062
+rs6000_debug_print_mode (ssize_t m)
13066
+ fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
13067
+ for (rc = 0; rc < N_RELOAD_REG; rc++)
13069
+ addr_mask_type mask = reg_addr[m].addr_mask[rc];
13071
+ " %s: %c%c%c%c%c%c",
13072
+ reload_reg_map[rc].name,
13073
+ (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
13074
+ (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
13075
+ (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
13076
+ (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
13077
+ (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
13078
+ (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
13081
+ if (rs6000_vector_unit[m] != VECTOR_NONE
13082
+ || rs6000_vector_mem[m] != VECTOR_NONE
13083
+ || (reg_addr[m].reload_store != CODE_FOR_nothing)
13084
+ || (reg_addr[m].reload_load != CODE_FOR_nothing))
13087
+ " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
13088
+ rs6000_debug_vector_unit (rs6000_vector_unit[m]),
13089
+ rs6000_debug_vector_unit (rs6000_vector_mem[m]),
13090
+ (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
13091
+ (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
13094
+ fputs ("\n", stderr);
13097
#define DEBUG_FMT_ID "%-32s= "
13098
#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
13099
#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
13100
@@ -1690,6 +1916,7 @@
13101
static const char *const tf[2] = { "false", "true" };
13102
const char *nl = (const char *)0;
13104
+ size_t m1, m2, v;
13105
char costly_num[20];
13107
char flags_buffer[40];
13108
@@ -1700,20 +1927,67 @@
13109
const char *cmodel_str;
13110
struct cl_target_option cl_opts;
13112
- /* Map enum rs6000_vector to string. */
13113
- static const char *rs6000_debug_vector_unit[] = {
13120
+ /* Modes we want tieable information on. */
13121
+ static const enum machine_mode print_tieable_modes[] = {
13155
- fprintf (stderr, "Register information: (last virtual reg = %d)\n",
13156
- LAST_VIRTUAL_REGISTER);
13157
- rs6000_debug_reg_print (0, 31, "gr");
13158
- rs6000_debug_reg_print (32, 63, "fp");
13159
+ /* Virtual regs we are interested in. */
13160
+ const static struct {
13161
+ int regno; /* register number. */
13162
+ const char *name; /* register name. */
13163
+ } virtual_regs[] = {
13164
+ { STACK_POINTER_REGNUM, "stack pointer:" },
13165
+ { TOC_REGNUM, "toc: " },
13166
+ { STATIC_CHAIN_REGNUM, "static chain: " },
13167
+ { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
13168
+ { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
13169
+ { ARG_POINTER_REGNUM, "arg pointer: " },
13170
+ { FRAME_POINTER_REGNUM, "frame pointer:" },
13171
+ { FIRST_PSEUDO_REGISTER, "first pseudo: " },
13172
+ { FIRST_VIRTUAL_REGISTER, "first virtual:" },
13173
+ { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
13174
+ { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
13175
+ { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
13176
+ { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
13177
+ { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
13178
+ { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
13179
+ { LAST_VIRTUAL_REGISTER, "last virtual: " },
13182
+ fputs ("\nHard register information:\n", stderr);
13183
+ rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
13184
+ rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
13185
rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
13186
LAST_ALTIVEC_REGNO,
13188
@@ -1726,6 +2000,10 @@
13189
rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
13190
rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
13192
+ fputs ("\nVirtual/stack/frame registers:\n", stderr);
13193
+ for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
13194
+ fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
13198
"d reg_class = %s\n"
13199
@@ -1734,25 +2012,70 @@
13200
"wa reg_class = %s\n"
13201
"wd reg_class = %s\n"
13202
"wf reg_class = %s\n"
13203
- "ws reg_class = %s\n\n",
13204
+ "wg reg_class = %s\n"
13205
+ "wl reg_class = %s\n"
13206
+ "wm reg_class = %s\n"
13207
+ "wr reg_class = %s\n"
13208
+ "ws reg_class = %s\n"
13209
+ "wt reg_class = %s\n"
13210
+ "wu reg_class = %s\n"
13211
+ "wv reg_class = %s\n"
13212
+ "ww reg_class = %s\n"
13213
+ "wx reg_class = %s\n"
13214
+ "wy reg_class = %s\n"
13215
+ "wz reg_class = %s\n"
13217
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
13218
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
13219
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
13220
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
13221
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
13222
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
13223
- reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
13224
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
13225
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
13226
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
13227
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
13228
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
13229
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
13230
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
13231
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
13232
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
13233
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
13234
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
13235
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
13238
for (m = 0; m < NUM_MACHINE_MODES; ++m)
13239
- if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
13242
- fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
13243
- GET_MODE_NAME (m),
13244
- rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
13245
- rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
13247
+ rs6000_debug_print_mode (m);
13249
+ fputs ("\n", stderr);
13251
+ for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
13253
+ enum machine_mode mode1 = print_tieable_modes[m1];
13254
+ bool first_time = true;
13256
+ nl = (const char *)0;
13257
+ for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
13259
+ enum machine_mode mode2 = print_tieable_modes[m2];
13260
+ if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
13264
+ fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
13266
+ first_time = false;
13269
+ fprintf (stderr, " %s", GET_MODE_NAME (mode2));
13274
+ fputs ("\n", stderr);
13278
fputs (nl, stderr);
13280
@@ -1913,6 +2236,7 @@
13282
case ABI_NONE: abi_str = "none"; break;
13283
case ABI_AIX: abi_str = "aix"; break;
13284
+ case ABI_ELFv2: abi_str = "ELFv2"; break;
13285
case ABI_V4: abi_str = "V4"; break;
13286
case ABI_DARWIN: abi_str = "darwin"; break;
13287
default: abi_str = "unknown"; break;
13288
@@ -1935,6 +2259,13 @@
13289
if (TARGET_LINK_STACK)
13290
fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
13292
+ if (targetm.lra_p ())
13293
+ fprintf (stderr, DEBUG_FMT_S, "lra", "true");
13295
+ if (TARGET_P8_FUSION)
13296
+ fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
13297
+ (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
13299
fprintf (stderr, DEBUG_FMT_S, "plt-format",
13300
TARGET_SECURE_PLT ? "secure" : "bss");
13301
fprintf (stderr, DEBUG_FMT_S, "struct-return",
13302
@@ -1954,11 +2285,106 @@
13303
(int)RS6000_BUILTIN_COUNT);
13307
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
13308
+ legitimate address support to figure out the appropriate addressing to
13312
+rs6000_setup_reg_addr_masks (void)
13314
+ ssize_t rc, reg, m, nregs;
13315
+ addr_mask_type any_addr_mask, addr_mask;
13317
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
13319
+ /* SDmode is special in that we want to access it only via REG+REG
13320
+ addressing on power7 and above, since we want to use the LFIWZX and
13321
+ STFIWZX instructions to load it. */
13322
+ bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
13324
+ any_addr_mask = 0;
13325
+ for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
13328
+ reg = reload_reg_map[rc].reg;
13330
+ /* Can mode values go in the GPR/FPR/Altivec registers? */
13331
+ if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
13333
+ nregs = rs6000_hard_regno_nregs[m][reg];
13334
+ addr_mask |= RELOAD_REG_VALID;
13336
+ /* Indicate if the mode takes more than 1 physical register. If
13337
+ it takes a single register, indicate it can do REG+REG
13339
+ if (nregs > 1 || m == BLKmode)
13340
+ addr_mask |= RELOAD_REG_MULTIPLE;
13342
+ addr_mask |= RELOAD_REG_INDEXED;
13344
+ /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
13345
+ addressing. Restrict addressing on SPE for 64-bit types
13346
+ because of the SUBREG hackery used to address 64-bit floats in
13347
+ '32-bit' GPRs. To simplify secondary reload, don't allow
13348
+ update forms on scalar floating point types that can go in the
13349
+ upper registers. */
13351
+ if (TARGET_UPDATE
13352
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
13353
+ && GET_MODE_SIZE (m) <= 8
13354
+ && !VECTOR_MODE_P (m)
13355
+ && !COMPLEX_MODE_P (m)
13356
+ && !indexed_only_p
13357
+ && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
13358
+ && !(m == DFmode && TARGET_UPPER_REGS_DF)
13359
+ && !(m == SFmode && TARGET_UPPER_REGS_SF))
13361
+ addr_mask |= RELOAD_REG_PRE_INCDEC;
13363
+ /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
13364
+ we don't allow PRE_MODIFY for some multi-register
13369
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
13373
+ if (TARGET_POWERPC64)
13374
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
13379
+ if (TARGET_DF_INSN)
13380
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
13386
+ /* GPR and FPR registers can do REG+OFFSET addressing, except
13387
+ possibly for SDmode. */
13388
+ if ((addr_mask != 0) && !indexed_only_p
13389
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
13390
+ addr_mask |= RELOAD_REG_OFFSET;
13392
+ reg_addr[m].addr_mask[rc] = addr_mask;
13393
+ any_addr_mask |= addr_mask;
13396
+ reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
13401
/* Initialize the various global tables that are based on register size. */
13403
rs6000_init_hard_regno_mode_ok (bool global_init_p)
13410
@@ -1987,21 +2413,55 @@
13411
rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
13412
rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
13413
rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
13414
+ rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
13415
+ rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
13416
+ rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
13417
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
13418
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
13420
- /* Precalculate vector information, this must be set up before the
13421
- rs6000_hard_regno_nregs_internal below. */
13422
- for (m = 0; m < NUM_MACHINE_MODES; ++m)
13423
+ /* Precalculate register class to simpler reload register class. We don't
13424
+ need all of the register classes that are combinations of different
13425
+ classes, just the simple ones that have constraint letters. */
13426
+ for (c = 0; c < N_REG_CLASSES; c++)
13427
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
13429
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
13430
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
13431
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
13432
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
13433
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
13434
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
13435
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
13436
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
13437
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
13438
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
13439
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
13440
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
13444
- rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
13445
- rs6000_vector_reload[m][0] = CODE_FOR_nothing;
13446
- rs6000_vector_reload[m][1] = CODE_FOR_nothing;
13447
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
13448
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
13452
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
13453
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
13456
- for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
13457
- rs6000_constraints[c] = NO_REGS;
13458
+ /* Precalculate the valid memory formats as well as the vector information,
13459
+ this must be set up before the rs6000_hard_regno_nregs_internal calls
13461
+ gcc_assert ((int)VECTOR_NONE == 0);
13462
+ memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
13463
+ memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
13465
+ gcc_assert ((int)CODE_FOR_nothing == 0);
13466
+ memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
13468
+ gcc_assert ((int)NO_REGS == 0);
13469
+ memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
13471
/* The VSX hardware allows native alignment for vectors, but control whether the compiler
13472
believes it can use native alignment or still uses 128-bit alignment. */
13473
if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
13474
@@ -2062,12 +2522,13 @@
13478
- /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
13479
- Altivec doesn't have 64-bit support. */
13480
+ /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
13481
+ do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
13484
rs6000_vector_mem[V2DImode] = VECTOR_VSX;
13485
- rs6000_vector_unit[V2DImode] = VECTOR_NONE;
13486
+ rs6000_vector_unit[V2DImode]
13487
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
13488
rs6000_vector_align[V2DImode] = align64;
13491
@@ -2076,14 +2537,48 @@
13493
rs6000_vector_unit[DFmode] = VECTOR_VSX;
13494
rs6000_vector_mem[DFmode]
13495
- = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
13496
+ = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
13497
rs6000_vector_align[DFmode] = align64;
13500
+ /* Allow TImode in VSX register and set the VSX memory macros. */
13501
+ if (TARGET_VSX && TARGET_VSX_TIMODE)
13503
+ rs6000_vector_mem[TImode] = VECTOR_VSX;
13504
+ rs6000_vector_align[TImode] = align64;
13507
/* TODO add SPE and paired floating point vector support. */
13509
/* Register class constraints for the constraints that depend on compile
13511
+ switches. When the VSX code was added, different constraints were added
13512
+ based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
13513
+ of the VSX registers are used. The register classes for scalar floating
13514
+ point types is set, based on whether we allow that type into the upper
13515
+ (Altivec) registers. GCC has register classes to target the Altivec
13516
+ registers for load/store operations, to select using a VSX memory
13517
+ operation instead of the traditional floating point operation. The
13520
+ d - Register class to use with traditional DFmode instructions.
13521
+ f - Register class to use with traditional SFmode instructions.
13522
+ v - Altivec register.
13523
+ wa - Any VSX register.
13524
+ wd - Preferred register class for V2DFmode.
13525
+ wf - Preferred register class for V4SFmode.
13526
+ wg - Float register for power6x move insns.
13527
+ wl - Float register if we can do 32-bit signed int loads.
13528
+ wm - VSX register for ISA 2.07 direct move operations.
13529
+ wr - GPR if 64-bit mode is permitted.
13530
+ ws - Register class to do ISA 2.06 DF operations.
13531
+ wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
13532
+ wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
13533
+ wt - VSX register for TImode in VSX registers.
13534
+ ww - Register class to do SF conversions in with VSX operations.
13535
+ wx - Float register if we can do 32-bit int stores.
13536
+ wy - Register class to do ISA 2.07 SF operations.
13537
+ wz - Float register if we can do 32-bit unsigned int loads. */
13539
if (TARGET_HARD_FLOAT && TARGET_FPRS)
13540
rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
13542
@@ -2092,64 +2587,158 @@
13546
- /* At present, we just use VSX_REGS, but we have different constraints
13547
- based on the use, in case we want to fine tune the default register
13548
- class used. wa = any VSX register, wf = register class to use for
13549
- V4SF, wd = register class to use for V2DF, and ws = register classs to
13550
- use for DF scalars. */
13551
rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
13552
+ rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
13553
rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
13554
- rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
13555
- rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
13559
+ if (TARGET_VSX_TIMODE)
13560
+ rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
13562
+ if (TARGET_UPPER_REGS_DF)
13564
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
13565
+ rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
13568
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
13571
+ /* Add conditional constraints based on various options, to allow us to
13572
+ collapse multiple insn patterns. */
13573
if (TARGET_ALTIVEC)
13574
rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
13576
- /* Set up the reload helper functions. */
13577
+ if (TARGET_MFPGPR)
13578
+ rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
13580
+ if (TARGET_LFIWAX)
13581
+ rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
13583
+ if (TARGET_DIRECT_MOVE)
13584
+ rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
13586
+ if (TARGET_POWERPC64)
13587
+ rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
13589
+ if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
13591
+ rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
13592
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
13593
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
13595
+ else if (TARGET_P8_VECTOR)
13597
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
13598
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
13600
+ else if (TARGET_VSX)
13601
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
13603
+ if (TARGET_STFIWX)
13604
+ rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
13606
+ if (TARGET_LFIWZX)
13607
+ rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
13609
+ /* Set up the reload helper and direct move functions. */
13610
if (TARGET_VSX || TARGET_ALTIVEC)
13614
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
13615
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
13616
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store;
13617
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load;
13618
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store;
13619
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load;
13620
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store;
13621
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load;
13622
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store;
13623
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load;
13624
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store;
13625
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load;
13626
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
13627
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
13628
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
13629
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
13630
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
13631
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
13632
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
13633
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
13634
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
13635
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
13636
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
13637
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
13638
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
13639
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
13641
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_di_store;
13642
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_di_load;
13643
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
13644
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
13645
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
13646
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
13648
+ if (TARGET_P8_VECTOR)
13650
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
13651
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
13652
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
13653
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
13655
+ if (TARGET_VSX_TIMODE)
13657
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
13658
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
13660
+ if (TARGET_DIRECT_MOVE)
13662
+ if (TARGET_POWERPC64)
13664
+ reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
13665
+ reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
13666
+ reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
13667
+ reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
13668
+ reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
13669
+ reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
13670
+ reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
13671
+ reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
13673
+ reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
13674
+ reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
13675
+ reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
13676
+ reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
13677
+ reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
13678
+ reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
13679
+ reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
13680
+ reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
13684
+ reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
13685
+ reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
13686
+ reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
13692
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
13693
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
13694
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store;
13695
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load;
13696
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store;
13697
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load;
13698
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store;
13699
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load;
13700
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store;
13701
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load;
13702
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store;
13703
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load;
13704
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
13705
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
13706
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
13707
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
13708
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
13709
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
13710
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
13711
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
13712
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
13713
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
13714
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
13715
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
13716
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
13717
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
13719
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_si_store;
13720
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_si_load;
13721
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
13722
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
13723
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
13724
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
13726
+ if (TARGET_P8_VECTOR)
13728
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
13729
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
13730
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
13731
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
13733
+ if (TARGET_VSX_TIMODE)
13735
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
13736
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
13741
@@ -2267,6 +2856,11 @@
13745
+ /* Update the addr mask bits in reg_addr to help secondary reload and go if
13746
+ legitimate address support to figure out the appropriate addressing to
13748
+ rs6000_setup_reg_addr_masks ();
13750
if (global_init_p || TARGET_DEBUG_TARGET)
13752
if (TARGET_DEBUG_REG)
13753
@@ -2369,16 +2963,19 @@
13755
rs6000_builtin_mask_calculate (void)
13757
- return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
13758
- | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
13759
- | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
13760
- | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
13761
- | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
13762
- | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
13763
- | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
13764
- | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
13765
- | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
13766
- | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0));
13767
+ return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
13768
+ | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
13769
+ | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
13770
+ | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
13771
+ | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
13772
+ | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
13773
+ | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
13774
+ | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
13775
+ | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
13776
+ | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
13777
+ | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
13778
+ | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
13779
+ | ((TARGET_HTM) ? RS6000_BTM_HTM : 0));
13782
/* Override command line options. Mostly we process the processor type and
13783
@@ -2609,6 +3206,12 @@
13787
+ /* If little-endian, default to -mstrict-align on older processors.
13788
+ Testing for htm matches power8 and later. */
13789
+ if (!BYTES_BIG_ENDIAN
13790
+ && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
13791
+ rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
13793
/* Add some warnings for VSX. */
13796
@@ -2619,15 +3222,13 @@
13797
if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
13798
msg = N_("-mvsx requires hardware floating point");
13800
- rs6000_isa_flags &= ~ OPTION_MASK_VSX;
13802
+ rs6000_isa_flags &= ~ OPTION_MASK_VSX;
13803
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
13806
else if (TARGET_PAIRED_FLOAT)
13807
msg = N_("-mvsx and -mpaired are incompatible");
13808
- /* The hardware will allow VSX and little endian, but until we make sure
13809
- things like vector select, etc. work don't allow VSX on little endian
13810
- systems at this point. */
13811
- else if (!BYTES_BIG_ENDIAN)
13812
- msg = N_("-mvsx used with little endian code");
13813
else if (TARGET_AVOID_XFORM > 0)
13814
msg = N_("-mvsx needs indexed addressing");
13815
else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
13816
@@ -2647,9 +3248,24 @@
13820
+ /* If hard-float/altivec/vsx were explicitly turned off then don't allow
13821
+ the -mcpu setting to enable options that conflict. */
13822
+ if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
13823
+ && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
13824
+ | OPTION_MASK_ALTIVEC
13825
+ | OPTION_MASK_VSX)) != 0)
13826
+ rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
13827
+ | OPTION_MASK_DIRECT_MOVE)
13828
+ & ~rs6000_isa_flags_explicit);
13830
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
13831
+ rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
13833
/* For the newer switches (vsx, dfp, etc.) set some of the older options,
13834
unless the user explicitly used the -mno-<option> to disable the code. */
13836
+ if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
13837
+ rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
13838
+ else if (TARGET_VSX)
13839
rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
13840
else if (TARGET_POPCNTD)
13841
rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
13842
@@ -2664,6 +3280,69 @@
13843
else if (TARGET_ALTIVEC)
13844
rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
13846
+ if (TARGET_CRYPTO && !TARGET_ALTIVEC)
13848
+ if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
13849
+ error ("-mcrypto requires -maltivec");
13850
+ rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
13853
+ if (TARGET_DIRECT_MOVE && !TARGET_VSX)
13855
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
13856
+ error ("-mdirect-move requires -mvsx");
13857
+ rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
13860
+ if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
13862
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
13863
+ error ("-mpower8-vector requires -maltivec");
13864
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
13867
+ if (TARGET_P8_VECTOR && !TARGET_VSX)
13869
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
13870
+ error ("-mpower8-vector requires -mvsx");
13871
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
13874
+ if (TARGET_VSX_TIMODE && !TARGET_VSX)
13876
+ if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
13877
+ error ("-mvsx-timode requires -mvsx");
13878
+ rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
13881
+ /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
13882
+ silently turn off quad memory mode. */
13883
+ if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
13885
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
13886
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
13888
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
13891
+ /* Enable power8 fusion if we are tuning for power8, even if we aren't
13892
+ generating power8 instructions. */
13893
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
13894
+ rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
13895
+ & OPTION_MASK_P8_FUSION);
13897
+ /* Power8 does not fuse sign extended loads with the addis. If we are
13898
+ optimizing at high levels for speed, convert a sign extended load into a
13899
+ zero extending load, and an explicit sign extension. */
13900
+ if (TARGET_P8_FUSION
13901
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
13902
+ && optimize_function_for_speed_p (cfun)
13903
+ && optimize >= 3)
13904
+ rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
13906
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
13907
+ rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
13909
/* E500mc does "better" if we inline more aggressively. Respect the
13910
user's opinion, though. */
13911
if (rs6000_block_move_inline_limit == 0
13912
@@ -2790,6 +3469,9 @@
13913
if (flag_section_anchors)
13914
TARGET_NO_FP_IN_TOC = 1;
13916
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
13917
+ rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
13919
#ifdef SUBTARGET_OVERRIDE_OPTIONS
13920
SUBTARGET_OVERRIDE_OPTIONS;
13922
@@ -2800,6 +3482,9 @@
13923
SUB3TARGET_OVERRIDE_OPTIONS;
13926
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
13927
+ rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
13929
/* For the E500 family of cores, reset the single/double FP flags to let us
13930
check that they remain constant across attributes or pragmas. Also,
13931
clear a possible request for string instructions, not supported and which
13932
@@ -2849,16 +3534,19 @@
13933
&& rs6000_cpu != PROCESSOR_POWER5
13934
&& rs6000_cpu != PROCESSOR_POWER6
13935
&& rs6000_cpu != PROCESSOR_POWER7
13936
+ && rs6000_cpu != PROCESSOR_POWER8
13937
&& rs6000_cpu != PROCESSOR_PPCA2
13938
&& rs6000_cpu != PROCESSOR_CELL
13939
&& rs6000_cpu != PROCESSOR_PPC476);
13940
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
13941
|| rs6000_cpu == PROCESSOR_POWER5
13942
- || rs6000_cpu == PROCESSOR_POWER7);
13943
+ || rs6000_cpu == PROCESSOR_POWER7
13944
+ || rs6000_cpu == PROCESSOR_POWER8);
13945
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
13946
|| rs6000_cpu == PROCESSOR_POWER5
13947
|| rs6000_cpu == PROCESSOR_POWER6
13948
|| rs6000_cpu == PROCESSOR_POWER7
13949
+ || rs6000_cpu == PROCESSOR_POWER8
13950
|| rs6000_cpu == PROCESSOR_PPCE500MC
13951
|| rs6000_cpu == PROCESSOR_PPCE500MC64
13952
|| rs6000_cpu == PROCESSOR_PPCE5500
13953
@@ -2988,7 +3676,7 @@
13955
/* We should always be splitting complex arguments, but we can't break
13956
Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
13957
- if (DEFAULT_ABI != ABI_AIX)
13958
+ if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
13959
targetm.calls.split_complex_arg = NULL;
13962
@@ -3102,6 +3790,10 @@
13963
rs6000_cost = &power7_cost;
13966
+ case PROCESSOR_POWER8:
13967
+ rs6000_cost = &power8_cost;
13970
case PROCESSOR_PPCA2:
13971
rs6000_cost = &ppca2_cost;
13973
@@ -3274,7 +3966,8 @@
13974
&& (rs6000_cpu == PROCESSOR_POWER4
13975
|| rs6000_cpu == PROCESSOR_POWER5
13976
|| rs6000_cpu == PROCESSOR_POWER6
13977
- || rs6000_cpu == PROCESSOR_POWER7))
13978
+ || rs6000_cpu == PROCESSOR_POWER7
13979
+ || rs6000_cpu == PROCESSOR_POWER8))
13982
return align_loops_log;
13983
@@ -3813,6 +4506,22 @@
13984
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
13987
+ case BUILT_IN_CLZIMAX:
13988
+ case BUILT_IN_CLZLL:
13989
+ case BUILT_IN_CLZL:
13990
+ case BUILT_IN_CLZ:
13991
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
13993
+ if (out_mode == QImode && out_n == 16)
13994
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
13995
+ else if (out_mode == HImode && out_n == 8)
13996
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
13997
+ else if (out_mode == SImode && out_n == 4)
13998
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
13999
+ else if (out_mode == DImode && out_n == 2)
14000
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
14003
case BUILT_IN_COPYSIGN:
14004
if (VECTOR_UNIT_VSX_P (V2DFmode)
14005
&& out_mode == DFmode && out_n == 2
14006
@@ -3828,6 +4537,22 @@
14007
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
14008
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
14010
+ case BUILT_IN_POPCOUNTIMAX:
14011
+ case BUILT_IN_POPCOUNTLL:
14012
+ case BUILT_IN_POPCOUNTL:
14013
+ case BUILT_IN_POPCOUNT:
14014
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
14016
+ if (out_mode == QImode && out_n == 16)
14017
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
14018
+ else if (out_mode == HImode && out_n == 8)
14019
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
14020
+ else if (out_mode == SImode && out_n == 4)
14021
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
14022
+ else if (out_mode == DImode && out_n == 2)
14023
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
14026
case BUILT_IN_SQRT:
14027
if (VECTOR_UNIT_VSX_P (V2DFmode)
14028
&& out_mode == DFmode && out_n == 2
14029
@@ -4043,7 +4768,11 @@
14033
- if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
14034
+ if (DEFAULT_ABI == ABI_ELFv2)
14035
+ fprintf (file, "\t.abiversion 2\n");
14037
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
14038
+ || (TARGET_ELF && flag_pic == 2))
14040
switch_to_section (toc_section);
14041
switch_to_section (text_section);
14042
@@ -4274,15 +5003,16 @@
14044
/* Check if VAL is present in every STEP-th element, and the
14045
other elements are filled with its most significant bit. */
14046
- for (i = 0; i < nunits - 1; ++i)
14047
+ for (i = 1; i < nunits; ++i)
14049
HOST_WIDE_INT desired_val;
14050
- if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
14051
+ unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
14052
+ if ((i & (step - 1)) == 0)
14055
desired_val = msb_val;
14057
- if (desired_val != const_vector_elt_as_int (op, i))
14058
+ if (desired_val != const_vector_elt_as_int (op, elt))
14062
@@ -4698,8 +5428,11 @@
14064
rtx freg = gen_reg_rtx (V4SFmode);
14065
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
14066
+ rtx cvt = ((TARGET_XSCVDPSPN)
14067
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
14068
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
14070
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
14072
emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
14075
@@ -4726,6 +5459,7 @@
14076
of 64-bit items is not supported on Altivec. */
14077
if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
14080
mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
14081
emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
14082
XVECEXP (vals, 0, 0));
14083
@@ -4736,9 +5470,11 @@
14084
gen_rtx_SET (VOIDmode,
14087
+ field = (BYTES_BIG_ENDIAN ? const0_rtx
14088
+ : GEN_INT (GET_MODE_NUNITS (mode) - 1));
14089
x = gen_rtx_VEC_SELECT (inner_mode, target,
14090
gen_rtx_PARALLEL (VOIDmode,
14091
- gen_rtvec (1, const0_rtx)));
14092
+ gen_rtvec (1, field)));
14093
emit_insn (gen_rtx_SET (VOIDmode, target,
14094
gen_rtx_VEC_DUPLICATE (mode, x)));
14096
@@ -4811,10 +5547,27 @@
14097
XVECEXP (mask, 0, elt*width + i)
14098
= GEN_INT (i + 0x10);
14099
x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
14100
- x = gen_rtx_UNSPEC (mode,
14101
- gen_rtvec (3, target, reg,
14102
- force_reg (V16QImode, x)),
14105
+ if (BYTES_BIG_ENDIAN)
14106
+ x = gen_rtx_UNSPEC (mode,
14107
+ gen_rtvec (3, target, reg,
14108
+ force_reg (V16QImode, x)),
14112
+ /* Invert selector. */
14113
+ rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
14114
+ gen_rtx_CONST_INT (QImode, -1));
14115
+ rtx tmp = gen_reg_rtx (V16QImode);
14116
+ emit_move_insn (tmp, splat);
14117
+ x = gen_rtx_MINUS (V16QImode, tmp, force_reg (V16QImode, x));
14118
+ emit_move_insn (tmp, x);
14120
+ /* Permute with operands reversed and adjusted selector. */
14121
+ x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
14125
emit_insn (gen_rtx_SET (VOIDmode, target, x));
14128
@@ -4938,7 +5691,7 @@
14130
if (GET_CODE (op) == SUBREG
14131
&& (mode == SImode || mode == DImode || mode == TImode
14132
- || mode == DDmode || mode == TDmode)
14133
+ || mode == DDmode || mode == TDmode || mode == PTImode)
14134
&& REG_P (SUBREG_REG (op))
14135
&& (GET_MODE (SUBREG_REG (op)) == DFmode
14136
|| GET_MODE (SUBREG_REG (op)) == TFmode))
14137
@@ -4951,6 +5704,7 @@
14138
&& REG_P (SUBREG_REG (op))
14139
&& (GET_MODE (SUBREG_REG (op)) == DImode
14140
|| GET_MODE (SUBREG_REG (op)) == TImode
14141
+ || GET_MODE (SUBREG_REG (op)) == PTImode
14142
|| GET_MODE (SUBREG_REG (op)) == DDmode
14143
|| GET_MODE (SUBREG_REG (op)) == TDmode))
14145
@@ -5087,6 +5841,72 @@
14146
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
14149
+/* Return true if this is a move direct operation between GPR registers and
14150
+ floating point/VSX registers. */
14153
+direct_move_p (rtx op0, rtx op1)
14155
+ int regno0, regno1;
14157
+ if (!REG_P (op0) || !REG_P (op1))
14160
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
14163
+ regno0 = REGNO (op0);
14164
+ regno1 = REGNO (op1);
14165
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
14168
+ if (INT_REGNO_P (regno0))
14169
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
14171
+ else if (INT_REGNO_P (regno1))
14173
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
14176
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
14183
+/* Return true if this is a load or store quad operation. */
14186
+quad_load_store_p (rtx op0, rtx op1)
14190
+ if (!TARGET_QUAD_MEMORY)
14193
+ else if (REG_P (op0) && MEM_P (op1))
14194
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
14195
+ && quad_memory_operand (op1, GET_MODE (op1))
14196
+ && !reg_overlap_mentioned_p (op0, op1));
14198
+ else if (MEM_P (op0) && REG_P (op1))
14199
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
14200
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
14205
+ if (TARGET_DEBUG_ADDR)
14207
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
14208
+ ret ? "true" : "false");
14209
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
14215
/* Given an address, return a constant offset term if one exists. */
14218
@@ -5170,7 +5990,11 @@
14222
- /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. */
14224
+ /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
14225
+ TImode is not a vector mode, if we want to use the VSX registers to
14226
+ move it around, we need to restrict ourselves to reg+reg
14228
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
14231
@@ -5184,6 +6008,13 @@
14236
+ /* If we can do direct load/stores of SDmode, restrict it to reg+reg
14237
+ addressing for the LFIWZX and STFIWX instructions. */
14238
+ if (TARGET_NO_SDMODE_STACK)
14245
@@ -5416,7 +6247,7 @@
14247
/* If we are using VSX scalar loads, restrict ourselves to reg+reg
14249
- if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
14250
+ if (VECTOR_MEM_VSX_P (mode))
14254
@@ -5428,6 +6259,7 @@
14259
if (TARGET_E500_DOUBLE)
14260
return (SPE_CONST_OFFSET_OK (offset)
14261
&& SPE_CONST_OFFSET_OK (offset + 8));
14262
@@ -5527,7 +6359,7 @@
14264
if (TARGET_ELF || TARGET_MACHO)
14266
- if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
14267
+ if (DEFAULT_ABI == ABI_V4 && flag_pic)
14271
@@ -5583,8 +6415,11 @@
14272
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
14273
return force_reg (Pmode, XEXP (x, 0));
14275
+ /* For TImode with load/store quad, restrict addresses to just a single
14276
+ pointer, so it works with both GPRs and VSX registers. */
14277
/* Make sure both operands are registers. */
14278
- else if (GET_CODE (x) == PLUS)
14279
+ else if (GET_CODE (x) == PLUS
14280
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
14281
return gen_rtx_PLUS (Pmode,
14282
force_reg (Pmode, XEXP (x, 0)),
14283
force_reg (Pmode, XEXP (x, 1)));
14284
@@ -5604,11 +6439,12 @@
14289
/* As in legitimate_offset_address_p we do not assume
14290
worst-case. The mode here is just a hint as to the registers
14291
used. A TImode is usually in gprs, but may actually be in
14292
fprs. Leave worst-case scenario for reload to handle via
14293
- insn constraints. */
14294
+ insn constraints. PTImode is only GPRs. */
14298
@@ -6100,10 +6936,13 @@
14299
1, const0_rtx, Pmode);
14301
r3 = gen_rtx_REG (Pmode, 3);
14302
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
14303
- insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
14304
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
14305
- insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
14306
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14308
+ if (TARGET_64BIT)
14309
+ insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
14311
+ insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
14313
else if (DEFAULT_ABI == ABI_V4)
14314
insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
14316
@@ -6122,10 +6961,13 @@
14317
1, const0_rtx, Pmode);
14319
r3 = gen_rtx_REG (Pmode, 3);
14320
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
14321
- insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
14322
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
14323
- insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
14324
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14326
+ if (TARGET_64BIT)
14327
+ insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
14329
+ insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
14331
else if (DEFAULT_ABI == ABI_V4)
14332
insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
14334
@@ -6339,7 +7181,7 @@
14335
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
14336
|| mode == DDmode || mode == TDmode
14337
|| mode == DImode))
14338
- && VECTOR_MEM_NONE_P (mode))
14339
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
14341
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
14342
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
14343
@@ -6370,7 +7212,7 @@
14345
if (GET_CODE (x) == SYMBOL_REF
14347
- && VECTOR_MEM_NONE_P (mode)
14348
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
14349
&& !SPE_VECTOR_MODE (mode)
14351
&& DEFAULT_ABI == ABI_DARWIN
14352
@@ -6396,6 +7238,8 @@
14353
mem is sufficiently aligned. */
14356
+ && (mode != TImode || !TARGET_VSX_TIMODE)
14357
+ && mode != PTImode
14358
&& (mode != DImode || TARGET_POWERPC64)
14359
&& ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
14360
|| (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
14361
@@ -6516,15 +7360,9 @@
14363
if (legitimate_indirect_address_p (x, reg_ok_strict))
14365
- if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
14366
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
14367
- && !SPE_VECTOR_MODE (mode)
14368
- && mode != TFmode
14369
- && mode != TDmode
14370
- /* Restrict addressing for DI because of our SUBREG hackery. */
14371
- && !(TARGET_E500_DOUBLE
14372
- && (mode == DFmode || mode == DDmode || mode == DImode))
14374
+ if (TARGET_UPDATE
14375
+ && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
14376
+ && mode_supports_pre_incdec_p (mode)
14377
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
14379
if (virtual_stack_registers_memory_p (x))
14380
@@ -6534,6 +7372,13 @@
14382
&& legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
14384
+ /* For TImode, if we have load/store quad and TImode in VSX registers, only
14385
+ allow register indirect addresses. This will allow the values to go in
14386
+ either GPRs or VSX registers without reloading. The vector types would
14387
+ tend to go into VSX registers, so we allow REG+REG, while TImode seems
14388
+ somewhat split, in that some uses are GPR based, and some VSX based. */
14389
+ if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
14391
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
14392
if (! reg_ok_strict
14394
@@ -6545,31 +7390,20 @@
14396
if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
14398
- if (mode != TImode
14399
- && mode != TFmode
14400
+ if (mode != TFmode
14402
&& ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
14403
|| TARGET_POWERPC64
14404
|| (mode != DFmode && mode != DDmode)
14405
|| (TARGET_E500_DOUBLE && mode != DDmode))
14406
&& (TARGET_POWERPC64 || mode != DImode)
14407
+ && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
14408
+ && mode != PTImode
14409
&& !avoiding_indexed_address_p (mode)
14410
&& legitimate_indexed_address_p (x, reg_ok_strict))
14412
- if (GET_CODE (x) == PRE_MODIFY
14413
- && mode != TImode
14414
- && mode != TFmode
14415
- && mode != TDmode
14416
- && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
14417
- || TARGET_POWERPC64
14418
- || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
14419
- && (TARGET_POWERPC64 || mode != DImode)
14420
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
14421
- && !SPE_VECTOR_MODE (mode)
14422
- /* Restrict addressing for DI because of our SUBREG hackery. */
14423
- && !(TARGET_E500_DOUBLE
14424
- && (mode == DFmode || mode == DDmode || mode == DImode))
14426
+ if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
14427
+ && mode_supports_pre_modify_p (mode)
14428
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
14429
&& (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
14430
reg_ok_strict, false)
14431
@@ -6590,10 +7424,13 @@
14432
bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
14434
"\nrs6000_legitimate_address_p: return = %s, mode = %s, "
14435
- "strict = %d, code = %s\n",
14436
+ "strict = %d, reload = %s, code = %s\n",
14437
ret ? "true" : "false",
14438
GET_MODE_NAME (mode),
14440
+ (reload_completed
14442
+ : (reload_in_progress ? "progress" : "before")),
14443
GET_RTX_NAME (GET_CODE (x)));
14446
@@ -6759,7 +7596,7 @@
14448
/* The TOC register is not killed across calls in a way that is
14449
visible to the compiler. */
14450
- if (DEFAULT_ABI == ABI_AIX)
14451
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14452
call_really_used_regs[2] = 0;
14454
if (DEFAULT_ABI == ABI_V4
14455
@@ -7007,7 +7844,7 @@
14458
/* Helper for the following. Get rid of [r+r] memory refs
14459
- in cases where it won't work (TImode, TFmode, TDmode). */
14460
+ in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
14463
rs6000_eliminate_indexed_memrefs (rtx operands[2])
14464
@@ -7032,6 +7869,107 @@
14465
copy_addr_to_reg (XEXP (operands[1], 0)));
14468
+/* Generate a vector of constants to permute MODE for a little-endian
14469
+ storage operation by swapping the two halves of a vector. */
14471
+rs6000_const_vec (enum machine_mode mode)
14493
+ gcc_unreachable();
14496
+ v = rtvec_alloc (subparts);
14498
+ for (i = 0; i < subparts / 2; ++i)
14499
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
14500
+ for (i = subparts / 2; i < subparts; ++i)
14501
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
14506
+/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
14507
+ for a VSX load or store operation. */
14509
+rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
14511
+ rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
14512
+ return gen_rtx_VEC_SELECT (mode, source, par);
14515
+/* Emit a little-endian load from vector memory location SOURCE to VSX
14516
+ register DEST in mode MODE. The load is done with two permuting
14517
+ insn's that represent an lxvd2x and xxpermdi. */
14519
+rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
14521
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
14522
+ rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode);
14523
+ rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
14524
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
14525
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
14528
+/* Emit a little-endian store to vector memory location DEST from VSX
14529
+ register SOURCE in mode MODE. The store is done with two permuting
14530
+ insn's that represent an xxpermdi and an stxvd2x. */
14532
+rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
14534
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
14535
+ rtx permute_src = rs6000_gen_le_vsx_permute (source, mode);
14536
+ rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
14537
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
14538
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
14541
+/* Emit a sequence representing a little-endian VSX load or store,
14542
+ moving data from SOURCE to DEST in mode MODE. This is done
14543
+ separately from rs6000_emit_move to ensure it is called only
14544
+ during expand. LE VSX loads and stores introduced later are
14545
+ handled with a split. The expand-time RTL generation allows
14546
+ us to optimize away redundant pairs of register-permutes. */
14548
+rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
14550
+ gcc_assert (!BYTES_BIG_ENDIAN
14551
+ && VECTOR_MEM_VSX_P (mode)
14552
+ && mode != TImode
14553
+ && !gpr_or_gpr_p (dest, source)
14554
+ && (MEM_P (source) ^ MEM_P (dest)));
14556
+ if (MEM_P (source))
14558
+ gcc_assert (REG_P (dest));
14559
+ rs6000_emit_le_vsx_load (dest, source, mode);
14563
+ if (!REG_P (source))
14564
+ source = force_reg (mode, source);
14565
+ rs6000_emit_le_vsx_store (dest, source, mode);
14569
/* Emit a move from SOURCE to DEST in mode MODE. */
14571
rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
14572
@@ -7152,6 +8090,7 @@
14574
if (reload_in_progress
14576
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
14577
&& MEM_P (operands[0])
14578
&& rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
14579
&& REG_P (operands[1]))
14580
@@ -7164,7 +8103,9 @@
14582
else if (INT_REGNO_P (REGNO (operands[1])))
14584
- rtx mem = adjust_address_nv (operands[0], mode, 4);
14585
+ rtx mem = operands[0];
14586
+ if (BYTES_BIG_ENDIAN)
14587
+ mem = adjust_address_nv (mem, mode, 4);
14588
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
14589
emit_insn (gen_movsd_hardfloat (mem, operands[1]));
14591
@@ -7176,6 +8117,7 @@
14593
&& REG_P (operands[0])
14594
&& MEM_P (operands[1])
14595
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
14596
&& rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
14598
if (FP_REGNO_P (REGNO (operands[0])))
14599
@@ -7186,7 +8128,9 @@
14601
else if (INT_REGNO_P (REGNO (operands[0])))
14603
- rtx mem = adjust_address_nv (operands[1], mode, 4);
14604
+ rtx mem = operands[1];
14605
+ if (BYTES_BIG_ENDIAN)
14606
+ mem = adjust_address_nv (mem, mode, 4);
14607
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
14608
emit_insn (gen_movsd_hardfloat (operands[0], mem));
14610
@@ -7389,6 +8333,11 @@
14614
+ if (!VECTOR_MEM_VSX_P (TImode))
14615
+ rs6000_eliminate_indexed_memrefs (operands);
14619
rs6000_eliminate_indexed_memrefs (operands);
14622
@@ -7427,18 +8376,231 @@
14625
/* Nonzero if we can use a floating-point register to pass this arg. */
14626
-#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE) \
14627
+#define USE_FP_FOR_ARG_P(CUM,MODE) \
14628
(SCALAR_FLOAT_MODE_P (MODE) \
14629
&& (CUM)->fregno <= FP_ARG_MAX_REG \
14630
&& TARGET_HARD_FLOAT && TARGET_FPRS)
14632
/* Nonzero if we can use an AltiVec register to pass this arg. */
14633
-#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED) \
14634
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
14635
(ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
14636
&& (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
14637
&& TARGET_ALTIVEC_ABI \
14640
+/* Walk down the type tree of TYPE counting consecutive base elements.
14641
+ If *MODEP is VOIDmode, then set it to the first valid floating point
14642
+ or vector type. If a non-floating point or vector type is found, or
14643
+ if a floating point or vector type that doesn't match a non-VOIDmode
14644
+ *MODEP is found, then return -1, otherwise return the count in the
14648
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
14650
+ enum machine_mode mode;
14651
+ HOST_WIDE_INT size;
14653
+ switch (TREE_CODE (type))
14656
+ mode = TYPE_MODE (type);
14657
+ if (!SCALAR_FLOAT_MODE_P (mode))
14660
+ if (*modep == VOIDmode)
14663
+ if (*modep == mode)
14668
+ case COMPLEX_TYPE:
14669
+ mode = TYPE_MODE (TREE_TYPE (type));
14670
+ if (!SCALAR_FLOAT_MODE_P (mode))
14673
+ if (*modep == VOIDmode)
14676
+ if (*modep == mode)
14681
+ case VECTOR_TYPE:
14682
+ if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
14685
+ /* Use V4SImode as representative of all 128-bit vector types. */
14686
+ size = int_size_in_bytes (type);
14696
+ if (*modep == VOIDmode)
14699
+ /* Vector modes are considered to be opaque: two vectors are
14700
+ equivalent for the purposes of being homogeneous aggregates
14701
+ if they are the same size. */
14702
+ if (*modep == mode)
14710
+ tree index = TYPE_DOMAIN (type);
14712
+ /* Can't handle incomplete types. */
14713
+ if (!COMPLETE_TYPE_P (type))
14716
+ count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
14719
+ || !TYPE_MAX_VALUE (index)
14720
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
14721
+ || !TYPE_MIN_VALUE (index)
14722
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
14726
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
14727
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
14729
+ /* There must be no padding. */
14730
+ if (!host_integerp (TYPE_SIZE (type), 1)
14731
+ || (tree_low_cst (TYPE_SIZE (type), 1)
14732
+ != count * GET_MODE_BITSIZE (*modep)))
14738
+ case RECORD_TYPE:
14744
+ /* Can't handle incomplete types. */
14745
+ if (!COMPLETE_TYPE_P (type))
14748
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
14750
+ if (TREE_CODE (field) != FIELD_DECL)
14753
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
14754
+ if (sub_count < 0)
14756
+ count += sub_count;
14759
+ /* There must be no padding. */
14760
+ if (!host_integerp (TYPE_SIZE (type), 1)
14761
+ || (tree_low_cst (TYPE_SIZE (type), 1)
14762
+ != count * GET_MODE_BITSIZE (*modep)))
14769
+ case QUAL_UNION_TYPE:
14771
+ /* These aren't very interesting except in a degenerate case. */
14776
+ /* Can't handle incomplete types. */
14777
+ if (!COMPLETE_TYPE_P (type))
14780
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
14782
+ if (TREE_CODE (field) != FIELD_DECL)
14785
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
14786
+ if (sub_count < 0)
14788
+ count = count > sub_count ? count : sub_count;
14791
+ /* There must be no padding. */
14792
+ if (!host_integerp (TYPE_SIZE (type), 1)
14793
+ || (tree_low_cst (TYPE_SIZE (type), 1)
14794
+ != count * GET_MODE_BITSIZE (*modep)))
14807
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
14808
+ float or vector aggregate that shall be passed in FP/vector registers
14809
+ according to the ELFv2 ABI, return the homogeneous element mode in
14810
+ *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
14812
+ Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
14815
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
14816
+ enum machine_mode *elt_mode,
14819
+ /* Note that we do not accept complex types at the top level as
14820
+ homogeneous aggregates; these types are handled via the
14821
+ targetm.calls.split_complex_arg mechanism. Complex types
14822
+ can be elements of homogeneous aggregates, however. */
14823
+ if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
14825
+ enum machine_mode field_mode = VOIDmode;
14826
+ int field_count = rs6000_aggregate_candidate (type, &field_mode);
14828
+ if (field_count > 0)
14830
+ int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
14831
+ (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
14833
+ /* The ELFv2 ABI allows homogeneous aggregates to occupy
14834
+ up to AGGR_ARG_NUM_REG registers. */
14835
+ if (field_count * n_regs <= AGGR_ARG_NUM_REG)
14838
+ *elt_mode = field_mode;
14840
+ *n_elts = field_count;
14847
+ *elt_mode = mode;
14853
/* Return a nonzero value to say to return the function value in
14854
memory, just as large structures are always returned. TYPE will be
14855
the data type of the value, and FNTYPE will be the type of the
14856
@@ -7491,6 +8653,16 @@
14857
/* Otherwise fall through to more conventional ABI rules. */
14860
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
14861
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
14865
+ /* The ELFv2 ABI returns aggregates up to 16B in registers */
14866
+ if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
14867
+ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
14870
if (AGGREGATE_TYPE_P (type)
14871
&& (aix_struct_return
14872
|| (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
14873
@@ -7522,6 +8694,19 @@
14877
+/* Specify whether values returned in registers should be at the most
14878
+ significant end of a register. We want aggregates returned by
14879
+ value to match the way aggregates are passed to functions. */
14882
+rs6000_return_in_msb (const_tree valtype)
14884
+ return (DEFAULT_ABI == ABI_ELFv2
14885
+ && BYTES_BIG_ENDIAN
14886
+ && AGGREGATE_TYPE_P (valtype)
14887
+ && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
14890
#ifdef HAVE_AS_GNU_ATTRIBUTE
14891
/* Return TRUE if a call to function FNDECL may be one that
14892
potentially affects the function calling ABI of the object file. */
14893
@@ -7658,7 +8843,7 @@
14895
rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
14897
- if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
14898
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
14899
return must_pass_in_stack_var_size (mode, type);
14901
return must_pass_in_stack_var_size_or_pad (mode, type);
14902
@@ -7739,6 +8924,11 @@
14903
static unsigned int
14904
rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
14906
+ enum machine_mode elt_mode;
14909
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
14911
if (DEFAULT_ABI == ABI_V4
14912
&& (GET_MODE_SIZE (mode) == 8
14913
|| (TARGET_HARD_FLOAT
14914
@@ -7750,12 +8940,13 @@
14915
&& int_size_in_bytes (type) >= 8
14916
&& int_size_in_bytes (type) < 16))
14918
- else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
14919
+ else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
14920
|| (type && TREE_CODE (type) == VECTOR_TYPE
14921
&& int_size_in_bytes (type) >= 16))
14923
- else if (TARGET_MACHO
14924
- && rs6000_darwin64_abi
14925
+ else if (((TARGET_MACHO && rs6000_darwin64_abi)
14926
+ || DEFAULT_ABI == ABI_ELFv2
14927
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
14929
&& type && TYPE_ALIGN (type) > 64)
14931
@@ -7763,6 +8954,16 @@
14932
return PARM_BOUNDARY;
14935
+/* The offset in words to the start of the parameter save area. */
14937
+static unsigned int
14938
+rs6000_parm_offset (void)
14940
+ return (DEFAULT_ABI == ABI_V4 ? 2
14941
+ : DEFAULT_ABI == ABI_ELFv2 ? 4
14945
/* For a function parm of MODE and TYPE, return the starting word in
14946
the parameter area. NWORDS of the parameter area are already used. */
14948
@@ -7771,11 +8972,9 @@
14949
unsigned int nwords)
14951
unsigned int align;
14952
- unsigned int parm_offset;
14954
align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
14955
- parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
14956
- return nwords + (-(parm_offset + nwords) & align);
14957
+ return nwords + (-(rs6000_parm_offset () + nwords) & align);
14960
/* Compute the size (in words) of a function argument. */
14961
@@ -7882,7 +9081,7 @@
14963
if (TREE_CODE (ftype) == RECORD_TYPE)
14964
rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
14965
- else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
14966
+ else if (USE_FP_FOR_ARG_P (cum, mode))
14968
unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
14969
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
14970
@@ -7923,7 +9122,7 @@
14972
cum->words += n_fpregs;
14974
- else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
14975
+ else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
14977
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
14979
@@ -7960,6 +9159,11 @@
14980
rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
14981
const_tree type, bool named, int depth)
14983
+ enum machine_mode elt_mode;
14986
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
14988
/* Only tick off an argument if we're not recursing. */
14990
cum->nargs_prototype--;
14991
@@ -7980,15 +9184,16 @@
14994
if (TARGET_ALTIVEC_ABI
14995
- && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
14996
+ && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
14997
|| (type && TREE_CODE (type) == VECTOR_TYPE
14998
&& int_size_in_bytes (type) == 16)))
15000
bool stack = false;
15002
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
15003
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
15006
+ cum->vregno += n_elts;
15008
if (!TARGET_ALTIVEC)
15009
error ("cannot pass argument in vector register because"
15010
" altivec instructions are disabled, use -maltivec"
15011
@@ -7997,7 +9202,8 @@
15012
/* PowerPC64 Linux and AIX allocate GPRs for a vector argument
15013
even if it is going to be passed in a vector register.
15014
Darwin does the same for variable-argument functions. */
15015
- if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
15016
+ if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
15018
|| (cum->stdarg && DEFAULT_ABI != ABI_V4))
15021
@@ -8008,15 +9214,13 @@
15025
- /* Vector parameters must be 16-byte aligned. This places
15026
- them at 2 mod 4 in terms of words in 32-bit mode, since
15027
- the parameter save area starts at offset 24 from the
15028
- stack. In 64-bit mode, they just have to start on an
15029
- even word, since the parameter save area is 16-byte
15030
- aligned. Space for GPRs is reserved even if the argument
15031
- will be passed in memory. */
15032
+ /* Vector parameters must be 16-byte aligned. In 32-bit
15033
+ mode this means we need to take into account the offset
15034
+ to the parameter save area. In 64-bit mode, they just
15035
+ have to start on an even word, since the parameter save
15036
+ area is 16-byte aligned. */
15038
- align = (2 - cum->words) & 3;
15039
+ align = -(rs6000_parm_offset () + cum->words) & 3;
15041
align = cum->words & 1;
15042
cum->words += align + rs6000_arg_size (mode, type);
15043
@@ -8141,15 +9345,15 @@
15045
cum->words = align_words + n_words;
15047
- if (SCALAR_FLOAT_MODE_P (mode)
15048
+ if (SCALAR_FLOAT_MODE_P (elt_mode)
15049
&& TARGET_HARD_FLOAT && TARGET_FPRS)
15051
/* _Decimal128 must be passed in an even/odd float register pair.
15052
This assumes that the register number is odd when fregno is
15054
- if (mode == TDmode && (cum->fregno % 2) == 1)
15055
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
15057
- cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
15058
+ cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
15061
if (TARGET_DEBUG_ARG)
15062
@@ -8359,7 +9563,7 @@
15064
if (TREE_CODE (ftype) == RECORD_TYPE)
15065
rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
15066
- else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
15067
+ else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
15069
unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
15071
@@ -8387,7 +9591,7 @@
15072
if (mode == TFmode || mode == TDmode)
15075
- else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
15076
+ else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
15078
rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
15080
@@ -8504,6 +9708,84 @@
15081
return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
15084
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
15085
+ but must also be copied into the parameter save area starting at
15086
+ offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
15087
+ to the GPRs and/or memory. Return the number of elements used. */
15090
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
15091
+ int align_words, rtx *rvec)
15095
+ if (align_words < GP_ARG_NUM_REG)
15097
+ int n_words = rs6000_arg_size (mode, type);
15099
+ if (align_words + n_words > GP_ARG_NUM_REG
15100
+ || mode == BLKmode
15101
+ || (TARGET_32BIT && TARGET_POWERPC64))
15103
+ /* If this is partially on the stack, then we only
15104
+ include the portion actually in registers here. */
15105
+ enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
15108
+ if (align_words + n_words > GP_ARG_NUM_REG)
15110
+ /* Not all of the arg fits in gprs. Say that it goes in memory
15111
+ too, using a magic NULL_RTX component. Also see comment in
15112
+ rs6000_mixed_function_arg for why the normal
15113
+ function_arg_partial_nregs scheme doesn't work in this case. */
15114
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
15119
+ rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
15120
+ rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
15121
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
15123
+ while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
15127
+ /* The whole arg fits in gprs. */
15128
+ rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
15129
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
15134
+ /* It's entirely in memory. */
15135
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
15141
+/* RVEC is a vector of K components of an argument of mode MODE.
15142
+ Construct the final function_arg return value from it. */
15145
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
15147
+ gcc_assert (k >= 1);
15149
+ /* Avoid returning a PARALLEL in the trivial cases. */
15152
+ if (XEXP (rvec[0], 0) == NULL_RTX)
15155
+ if (GET_MODE (XEXP (rvec[0], 0)) == mode)
15156
+ return XEXP (rvec[0], 0);
15159
+ return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
15162
/* Determine where to put an argument to a function.
15163
Value is zero to push the argument on the stack,
15164
or a hard register in which to store the argument.
15165
@@ -8538,6 +9820,8 @@
15167
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
15168
enum rs6000_abi abi = DEFAULT_ABI;
15169
+ enum machine_mode elt_mode;
15172
/* Return a marker to indicate whether CR1 needs to set or clear the
15173
bit that V.4 uses to say fp args were passed in registers.
15174
@@ -8564,6 +9848,8 @@
15175
return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
15178
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
15180
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
15182
rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
15183
@@ -8572,33 +9858,30 @@
15184
/* Else fall through to usual handling. */
15187
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
15188
- if (TARGET_64BIT && ! cum->prototype)
15190
- /* Vector parameters get passed in vector register
15191
- and also in GPRs or memory, in absence of prototype. */
15194
- align_words = (cum->words + 1) & ~1;
15195
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
15197
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
15201
- if (align_words >= GP_ARG_NUM_REG)
15207
- slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
15209
- return gen_rtx_PARALLEL (mode,
15211
- gen_rtx_EXPR_LIST (VOIDmode,
15212
- slot, const0_rtx),
15213
- gen_rtx_EXPR_LIST (VOIDmode,
15214
- gen_rtx_REG (mode, cum->vregno),
15218
- return gen_rtx_REG (mode, cum->vregno);
15219
+ /* Do we also need to pass this argument in the parameter
15221
+ if (TARGET_64BIT && ! cum->prototype)
15223
+ int align_words = (cum->words + 1) & ~1;
15224
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
15227
+ /* Describe where this argument goes in the vector registers. */
15228
+ for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
15230
+ r = gen_rtx_REG (elt_mode, cum->vregno + i);
15231
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
15232
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
15235
+ return rs6000_finish_function_arg (mode, rvec, k);
15237
else if (TARGET_ALTIVEC_ABI
15238
&& (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
15239
|| (type && TREE_CODE (type) == VECTOR_TYPE
15240
@@ -8613,13 +9896,13 @@
15241
int align, align_words, n_words;
15242
enum machine_mode part_mode;
15244
- /* Vector parameters must be 16-byte aligned. This places them at
15245
- 2 mod 4 in terms of words in 32-bit mode, since the parameter
15246
- save area starts at offset 24 from the stack. In 64-bit mode,
15247
- they just have to start on an even word, since the parameter
15248
- save area is 16-byte aligned. */
15249
+ /* Vector parameters must be 16-byte aligned. In 32-bit
15250
+ mode this means we need to take into account the offset
15251
+ to the parameter save area. In 64-bit mode, they just
15252
+ have to start on an even word, since the parameter save
15253
+ area is 16-byte aligned. */
15255
- align = (2 - cum->words) & 3;
15256
+ align = -(rs6000_parm_offset () + cum->words) & 3;
15258
align = cum->words & 1;
15259
align_words = cum->words + align;
15260
@@ -8697,101 +9980,50 @@
15262
/* _Decimal128 must be passed in an even/odd float register pair.
15263
This assumes that the register number is odd when fregno is odd. */
15264
- if (mode == TDmode && (cum->fregno % 2) == 1)
15265
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
15268
- if (USE_FP_FOR_ARG_P (cum, mode, type))
15269
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
15271
- rtx rvec[GP_ARG_NUM_REG + 1];
15274
- bool needs_psave;
15275
- enum machine_mode fmode = mode;
15276
- unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
15277
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
15280
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
15282
- if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
15284
- /* Currently, we only ever need one reg here because complex
15285
- doubles are split. */
15286
- gcc_assert (cum->fregno == FP_ARG_MAX_REG
15287
- && (fmode == TFmode || fmode == TDmode));
15288
+ /* Do we also need to pass this argument in the parameter
15290
+ if (type && (cum->nargs_prototype <= 0
15291
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
15292
+ && TARGET_XL_COMPAT
15293
+ && align_words >= GP_ARG_NUM_REG)))
15294
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
15296
- /* Long double or _Decimal128 split over regs and memory. */
15297
- fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
15300
- /* Do we also need to pass this arg in the parameter save
15302
- needs_psave = (type
15303
- && (cum->nargs_prototype <= 0
15304
- || (DEFAULT_ABI == ABI_AIX
15305
- && TARGET_XL_COMPAT
15306
- && align_words >= GP_ARG_NUM_REG)));
15308
- if (!needs_psave && mode == fmode)
15309
- return gen_rtx_REG (fmode, cum->fregno);
15313
+ /* Describe where this argument goes in the fprs. */
15314
+ for (i = 0; i < n_elts
15315
+ && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
15317
- /* Describe the part that goes in gprs or the stack.
15318
- This piece must come first, before the fprs. */
15319
- if (align_words < GP_ARG_NUM_REG)
15320
+ /* Check if the argument is split over registers and memory.
15321
+ This can only ever happen for long double or _Decimal128;
15322
+ complex types are handled via split_complex_arg. */
15323
+ enum machine_mode fmode = elt_mode;
15324
+ if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
15326
- unsigned long n_words = rs6000_arg_size (mode, type);
15327
+ gcc_assert (fmode == TFmode || fmode == TDmode);
15328
+ fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
15331
- if (align_words + n_words > GP_ARG_NUM_REG
15332
- || (TARGET_32BIT && TARGET_POWERPC64))
15334
- /* If this is partially on the stack, then we only
15335
- include the portion actually in registers here. */
15336
- enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
15339
- if (align_words + n_words > GP_ARG_NUM_REG)
15340
- /* Not all of the arg fits in gprs. Say that it
15341
- goes in memory too, using a magic NULL_RTX
15342
- component. Also see comment in
15343
- rs6000_mixed_function_arg for why the normal
15344
- function_arg_partial_nregs scheme doesn't work
15346
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
15350
- r = gen_rtx_REG (rmode,
15351
- GP_ARG_MIN_REG + align_words);
15352
- off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
15353
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
15355
- while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
15359
- /* The whole arg fits in gprs. */
15360
- r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
15361
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
15365
- /* It's entirely in memory. */
15366
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
15367
+ r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
15368
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
15369
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
15372
- /* Describe where this piece goes in the fprs. */
15373
- r = gen_rtx_REG (fmode, cum->fregno);
15374
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
15376
- return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
15377
+ return rs6000_finish_function_arg (mode, rvec, k);
15379
else if (align_words < GP_ARG_NUM_REG)
15381
if (TARGET_32BIT && TARGET_POWERPC64)
15382
return rs6000_mixed_function_arg (mode, type, align_words);
15384
- if (mode == BLKmode)
15387
return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
15390
@@ -8810,42 +10042,62 @@
15391
tree type, bool named)
15393
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
15394
+ bool passed_in_gprs = true;
15397
+ enum machine_mode elt_mode;
15400
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
15402
if (DEFAULT_ABI == ABI_V4)
15405
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
15406
- && cum->nargs_prototype >= 0)
15408
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
15410
+ /* If we are passing this arg in the fixed parameter save area
15411
+ (gprs or memory) as well as VRs, we do not use the partial
15412
+ bytes mechanism; instead, rs6000_function_arg will return a
15413
+ PARALLEL including a memory element as necessary. */
15414
+ if (TARGET_64BIT && ! cum->prototype)
15417
+ /* Otherwise, we pass in VRs only. Check for partial copies. */
15418
+ passed_in_gprs = false;
15419
+ if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
15420
+ ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
15423
/* In this complicated case we just disable the partial_nregs code. */
15424
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
15427
align_words = rs6000_parm_start (mode, type, cum->words);
15429
- if (USE_FP_FOR_ARG_P (cum, mode, type))
15430
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
15432
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
15434
/* If we are passing this arg in the fixed parameter save area
15435
- (gprs or memory) as well as fprs, then this function should
15436
- return the number of partial bytes passed in the parameter
15437
- save area rather than partial bytes passed in fprs. */
15438
+ (gprs or memory) as well as FPRs, we do not use the partial
15439
+ bytes mechanism; instead, rs6000_function_arg will return a
15440
+ PARALLEL including a memory element as necessary. */
15442
&& (cum->nargs_prototype <= 0
15443
- || (DEFAULT_ABI == ABI_AIX
15444
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
15445
&& TARGET_XL_COMPAT
15446
&& align_words >= GP_ARG_NUM_REG)))
15448
- else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
15449
- > FP_ARG_MAX_REG + 1)
15450
- ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
15451
- else if (cum->nargs_prototype >= 0)
15454
+ /* Otherwise, we pass in FPRs only. Check for partial copies. */
15455
+ passed_in_gprs = false;
15456
+ if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
15457
+ ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
15458
+ * MIN (8, GET_MODE_SIZE (elt_mode)));
15461
- if (align_words < GP_ARG_NUM_REG
15462
+ if (passed_in_gprs
15463
+ && align_words < GP_ARG_NUM_REG
15464
&& GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
15465
ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
15467
@@ -8926,6 +10178,139 @@
15471
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
15472
+ already processes. Return true if the parameter must be passed
15473
+ (fully or partially) on the stack. */
15476
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
15478
+ enum machine_mode mode;
15482
+ /* Catch errors. */
15483
+ if (type == NULL || type == error_mark_node)
15486
+ /* Handle types with no storage requirement. */
15487
+ if (TYPE_MODE (type) == VOIDmode)
15490
+ /* Handle complex types. */
15491
+ if (TREE_CODE (type) == COMPLEX_TYPE)
15492
+ return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
15493
+ || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
15495
+ /* Handle transparent aggregates. */
15496
+ if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
15497
+ && TYPE_TRANSPARENT_AGGR (type))
15498
+ type = TREE_TYPE (first_field (type));
15500
+ /* See if this arg was passed by invisible reference. */
15501
+ if (pass_by_reference (get_cumulative_args (args_so_far),
15502
+ TYPE_MODE (type), type, true))
15503
+ type = build_pointer_type (type);
15505
+ /* Find mode as it is passed by the ABI. */
15506
+ unsignedp = TYPE_UNSIGNED (type);
15507
+ mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
15509
+ /* If we must pass in stack, we need a stack. */
15510
+ if (rs6000_must_pass_in_stack (mode, type))
15513
+ /* If there is no incoming register, we need a stack. */
15514
+ entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
15515
+ if (entry_parm == NULL)
15518
+ /* Likewise if we need to pass both in registers and on the stack. */
15519
+ if (GET_CODE (entry_parm) == PARALLEL
15520
+ && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
15523
+ /* Also true if we're partially in registers and partially not. */
15524
+ if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
15527
+ /* Update info on where next arg arrives in registers. */
15528
+ rs6000_function_arg_advance (args_so_far, mode, type, true);
15532
+/* Return true if FUN has no prototype, has a variable argument
15533
+ list, or passes any parameter in memory. */
15536
+rs6000_function_parms_need_stack (tree fun)
15538
+ function_args_iterator args_iter;
15540
+ CUMULATIVE_ARGS args_so_far_v;
15541
+ cumulative_args_t args_so_far;
15544
+ /* Must be a libcall, all of which only use reg parms. */
15546
+ if (!TYPE_P (fun))
15547
+ fun = TREE_TYPE (fun);
15549
+ /* Varargs functions need the parameter save area. */
15550
+ if (!prototype_p (fun) || stdarg_p (fun))
15553
+ INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
15554
+ args_so_far = pack_cumulative_args (&args_so_far_v);
15556
+ if (aggregate_value_p (TREE_TYPE (fun), fun))
15558
+ tree type = build_pointer_type (TREE_TYPE (fun));
15559
+ rs6000_parm_needs_stack (args_so_far, type);
15562
+ FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
15563
+ if (rs6000_parm_needs_stack (args_so_far, arg_type))
15569
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
15570
+ usually a constant depending on the ABI. However, in the ELFv2 ABI
15571
+ the register parameter area is optional when calling a function that
15572
+ has a prototype is scope, has no variable argument list, and passes
15573
+ all parameters in registers. */
15576
+rs6000_reg_parm_stack_space (tree fun)
15578
+ int reg_parm_stack_space;
15580
+ switch (DEFAULT_ABI)
15583
+ reg_parm_stack_space = 0;
15588
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
15592
+ /* ??? Recomputing this every time is a bit expensive. Is there
15593
+ a place to cache this information? */
15594
+ if (rs6000_function_parms_need_stack (fun))
15595
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
15597
+ reg_parm_stack_space = 0;
15601
+ return reg_parm_stack_space;
15605
rs6000_move_block_from_reg (int regno, rtx x, int nregs)
15607
@@ -9307,8 +10692,10 @@
15608
We don't need to check for pass-by-reference because of the test above.
15609
We can return a simplifed answer, since we know there's no offset to add. */
15612
- && rs6000_darwin64_abi
15613
+ if (((TARGET_MACHO
15614
+ && rs6000_darwin64_abi)
15615
+ || DEFAULT_ABI == ABI_ELFv2
15616
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
15617
&& integer_zerop (TYPE_SIZE (type)))
15619
unsigned HOST_WIDE_INT align, boundary;
15620
@@ -9603,6 +10990,7 @@
15621
#undef RS6000_BUILTIN_A
15622
#undef RS6000_BUILTIN_D
15623
#undef RS6000_BUILTIN_E
15624
+#undef RS6000_BUILTIN_H
15625
#undef RS6000_BUILTIN_P
15626
#undef RS6000_BUILTIN_Q
15627
#undef RS6000_BUILTIN_S
15628
@@ -9616,6 +11004,7 @@
15629
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15630
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15631
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15632
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15633
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15634
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15635
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15636
@@ -9634,6 +11023,7 @@
15637
#undef RS6000_BUILTIN_A
15638
#undef RS6000_BUILTIN_D
15639
#undef RS6000_BUILTIN_E
15640
+#undef RS6000_BUILTIN_H
15641
#undef RS6000_BUILTIN_P
15642
#undef RS6000_BUILTIN_Q
15643
#undef RS6000_BUILTIN_S
15644
@@ -9647,6 +11037,7 @@
15645
{ MASK, ICODE, NAME, ENUM },
15647
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15648
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15649
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15650
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15651
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15652
@@ -9665,6 +11056,7 @@
15653
#undef RS6000_BUILTIN_A
15654
#undef RS6000_BUILTIN_D
15655
#undef RS6000_BUILTIN_E
15656
+#undef RS6000_BUILTIN_H
15657
#undef RS6000_BUILTIN_P
15658
#undef RS6000_BUILTIN_Q
15659
#undef RS6000_BUILTIN_S
15660
@@ -9678,6 +11070,7 @@
15661
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15662
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15663
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15664
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15665
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15666
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15667
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15668
@@ -9694,6 +11087,7 @@
15669
#undef RS6000_BUILTIN_A
15670
#undef RS6000_BUILTIN_D
15671
#undef RS6000_BUILTIN_E
15672
+#undef RS6000_BUILTIN_H
15673
#undef RS6000_BUILTIN_P
15674
#undef RS6000_BUILTIN_Q
15675
#undef RS6000_BUILTIN_S
15676
@@ -9705,6 +11099,7 @@
15677
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15678
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15679
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15680
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15681
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
15682
{ MASK, ICODE, NAME, ENUM },
15684
@@ -9726,6 +11121,7 @@
15685
#undef RS6000_BUILTIN_A
15686
#undef RS6000_BUILTIN_D
15687
#undef RS6000_BUILTIN_E
15688
+#undef RS6000_BUILTIN_H
15689
#undef RS6000_BUILTIN_P
15690
#undef RS6000_BUILTIN_Q
15691
#undef RS6000_BUILTIN_S
15692
@@ -9737,6 +11133,7 @@
15693
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15694
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15695
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15696
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15697
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15698
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15699
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
15700
@@ -9756,6 +11153,7 @@
15701
#undef RS6000_BUILTIN_A
15702
#undef RS6000_BUILTIN_D
15703
#undef RS6000_BUILTIN_E
15704
+#undef RS6000_BUILTIN_H
15705
#undef RS6000_BUILTIN_P
15706
#undef RS6000_BUILTIN_Q
15707
#undef RS6000_BUILTIN_S
15708
@@ -9769,6 +11167,7 @@
15709
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
15710
{ MASK, ICODE, NAME, ENUM },
15712
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15713
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15714
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15715
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15716
@@ -9786,6 +11185,7 @@
15717
#undef RS6000_BUILTIN_A
15718
#undef RS6000_BUILTIN_D
15719
#undef RS6000_BUILTIN_E
15720
+#undef RS6000_BUILTIN_H
15721
#undef RS6000_BUILTIN_P
15722
#undef RS6000_BUILTIN_Q
15723
#undef RS6000_BUILTIN_S
15724
@@ -9797,6 +11197,7 @@
15725
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15726
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15727
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15728
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15729
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15730
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
15731
{ MASK, ICODE, NAME, ENUM },
15732
@@ -9817,6 +11218,7 @@
15733
#undef RS6000_BUILTIN_A
15734
#undef RS6000_BUILTIN_D
15735
#undef RS6000_BUILTIN_E
15736
+#undef RS6000_BUILTIN_H
15737
#undef RS6000_BUILTIN_P
15738
#undef RS6000_BUILTIN_Q
15739
#undef RS6000_BUILTIN_S
15740
@@ -9830,6 +11232,7 @@
15742
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15743
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15744
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15745
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15746
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15747
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15748
@@ -9847,8 +11250,9 @@
15749
#undef RS6000_BUILTIN_2
15750
#undef RS6000_BUILTIN_3
15751
#undef RS6000_BUILTIN_A
15752
+#undef RS6000_BUILTIN_D
15753
#undef RS6000_BUILTIN_E
15754
-#undef RS6000_BUILTIN_D
15755
+#undef RS6000_BUILTIN_H
15756
#undef RS6000_BUILTIN_P
15757
#undef RS6000_BUILTIN_Q
15758
#undef RS6000_BUILTIN_S
15759
@@ -9862,6 +11266,7 @@
15760
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15761
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15762
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15763
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
15764
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15765
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15766
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15767
@@ -9872,17 +11277,49 @@
15768
#include "rs6000-builtin.def"
15771
+/* HTM builtins. */
15772
#undef RS6000_BUILTIN_1
15773
#undef RS6000_BUILTIN_2
15774
#undef RS6000_BUILTIN_3
15775
#undef RS6000_BUILTIN_A
15776
#undef RS6000_BUILTIN_D
15777
#undef RS6000_BUILTIN_E
15778
+#undef RS6000_BUILTIN_H
15779
#undef RS6000_BUILTIN_P
15780
#undef RS6000_BUILTIN_Q
15781
#undef RS6000_BUILTIN_S
15782
#undef RS6000_BUILTIN_X
15784
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
15785
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
15786
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
15787
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
15788
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
15789
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
15790
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
15791
+ { MASK, ICODE, NAME, ENUM },
15793
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
15794
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
15795
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
15796
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
15798
+static const struct builtin_description bdesc_htm[] =
15800
+#include "rs6000-builtin.def"
15803
+#undef RS6000_BUILTIN_1
15804
+#undef RS6000_BUILTIN_2
15805
+#undef RS6000_BUILTIN_3
15806
+#undef RS6000_BUILTIN_A
15807
+#undef RS6000_BUILTIN_D
15808
+#undef RS6000_BUILTIN_E
15809
+#undef RS6000_BUILTIN_H
15810
+#undef RS6000_BUILTIN_P
15811
+#undef RS6000_BUILTIN_Q
15812
+#undef RS6000_BUILTIN_S
15814
/* Return true if a builtin function is overloaded. */
15816
rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
15817
@@ -10352,7 +11789,198 @@
15821
+/* Return the appropriate SPR number associated with the given builtin. */
15822
+static inline HOST_WIDE_INT
15823
+htm_spr_num (enum rs6000_builtins code)
15825
+ if (code == HTM_BUILTIN_GET_TFHAR
15826
+ || code == HTM_BUILTIN_SET_TFHAR)
15827
+ return TFHAR_SPR;
15828
+ else if (code == HTM_BUILTIN_GET_TFIAR
15829
+ || code == HTM_BUILTIN_SET_TFIAR)
15830
+ return TFIAR_SPR;
15831
+ else if (code == HTM_BUILTIN_GET_TEXASR
15832
+ || code == HTM_BUILTIN_SET_TEXASR)
15833
+ return TEXASR_SPR;
15834
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15835
+ || code == HTM_BUILTIN_SET_TEXASRU);
15836
+ return TEXASRU_SPR;
15839
+/* Return the appropriate SPR regno associated with the given builtin. */
15840
+static inline HOST_WIDE_INT
15841
+htm_spr_regno (enum rs6000_builtins code)
15843
+ if (code == HTM_BUILTIN_GET_TFHAR
15844
+ || code == HTM_BUILTIN_SET_TFHAR)
15845
+ return TFHAR_REGNO;
15846
+ else if (code == HTM_BUILTIN_GET_TFIAR
15847
+ || code == HTM_BUILTIN_SET_TFIAR)
15848
+ return TFIAR_REGNO;
15849
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15850
+ || code == HTM_BUILTIN_SET_TEXASR
15851
+ || code == HTM_BUILTIN_GET_TEXASRU
15852
+ || code == HTM_BUILTIN_SET_TEXASRU);
15853
+ return TEXASR_REGNO;
15856
+/* Return the correct ICODE value depending on whether we are
15857
+ setting or reading the HTM SPRs. */
15858
+static inline enum insn_code
15859
+rs6000_htm_spr_icode (bool nonvoid)
15862
+ return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15864
+ return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15867
+/* Expand the HTM builtin in EXP and store the result in TARGET.
15868
+ Store true in *EXPANDEDP if we found a builtin to expand. */
15870
+htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15872
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15873
+ bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15874
+ enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15875
+ const struct builtin_description *d;
15878
+ *expandedp = false;
15880
+ /* Expand the HTM builtins. */
15882
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15883
+ if (d->code == fcode)
15885
+ rtx op[MAX_HTM_OPERANDS], pat;
15888
+ call_expr_arg_iterator iter;
15889
+ unsigned attr = rs6000_builtin_info[fcode].attr;
15890
+ enum insn_code icode = d->icode;
15892
+ if (attr & RS6000_BTC_SPR)
15893
+ icode = rs6000_htm_spr_icode (nonvoid);
15897
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
15899
+ || GET_MODE (target) != tmode
15900
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
15901
+ target = gen_reg_rtx (tmode);
15902
+ op[nopnds++] = target;
15905
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15907
+ const struct insn_operand_data *insn_op;
15909
+ if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15912
+ insn_op = &insn_data[icode].operand[nopnds];
15914
+ op[nopnds] = expand_normal (arg);
15916
+ if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15918
+ if (!strcmp (insn_op->constraint, "n"))
15920
+ int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15921
+ if (!CONST_INT_P (op[nopnds]))
15922
+ error ("argument %d must be an unsigned literal", arg_num);
15924
+ error ("argument %d is an unsigned literal that is "
15925
+ "out of range", arg_num);
15926
+ return const0_rtx;
15928
+ op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15934
+ /* Handle the builtins for extended mnemonics. These accept
15935
+ no arguments, but map to builtins that take arguments. */
15938
+ case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15939
+ case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15940
+ op[nopnds++] = GEN_INT (1);
15941
+#ifdef ENABLE_CHECKING
15942
+ attr |= RS6000_BTC_UNARY;
15945
+ case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15946
+ op[nopnds++] = GEN_INT (0);
15947
+#ifdef ENABLE_CHECKING
15948
+ attr |= RS6000_BTC_UNARY;
15955
+ /* If this builtin accesses SPRs, then pass in the appropriate
15956
+ SPR number and SPR regno as the last two operands. */
15957
+ if (attr & RS6000_BTC_SPR)
15959
+ op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
15960
+ op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
15963
+#ifdef ENABLE_CHECKING
15964
+ int expected_nopnds = 0;
15965
+ if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15966
+ expected_nopnds = 1;
15967
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15968
+ expected_nopnds = 2;
15969
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15970
+ expected_nopnds = 3;
15971
+ if (!(attr & RS6000_BTC_VOID))
15972
+ expected_nopnds += 1;
15973
+ if (attr & RS6000_BTC_SPR)
15974
+ expected_nopnds += 2;
15976
+ gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
15982
+ pat = GEN_FCN (icode) (NULL_RTX);
15985
+ pat = GEN_FCN (icode) (op[0]);
15988
+ pat = GEN_FCN (icode) (op[0], op[1]);
15991
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15994
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15997
+ gcc_unreachable ();
16003
+ *expandedp = true;
16006
+ return const0_rtx;
16013
rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
16016
@@ -10428,7 +12056,28 @@
16020
+ else if (icode == CODE_FOR_crypto_vshasigmaw
16021
+ || icode == CODE_FOR_crypto_vshasigmad)
16023
+ /* Check whether the 2nd and 3rd arguments are integer constants and in
16024
+ range and prepare arguments. */
16025
+ STRIP_NOPS (arg1);
16026
+ if (TREE_CODE (arg1) != INTEGER_CST
16027
+ || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
16029
+ error ("argument 2 must be 0 or 1");
16030
+ return const0_rtx;
16033
+ STRIP_NOPS (arg2);
16034
+ if (TREE_CODE (arg2) != INTEGER_CST
16035
+ || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
16037
+ error ("argument 3 must be in the range 0..15");
16038
+ return const0_rtx;
16043
|| GET_MODE (target) != tmode
16044
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16045
@@ -11412,6 +13061,8 @@
16046
error ("Builtin function %s is only valid for the cell processor", name);
16047
else if ((fnmask & RS6000_BTM_VSX) != 0)
16048
error ("Builtin function %s requires the -mvsx option", name);
16049
+ else if ((fnmask & RS6000_BTM_HTM) != 0)
16050
+ error ("Builtin function %s requires the -mhtm option", name);
16051
else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16052
error ("Builtin function %s requires the -maltivec option", name);
16053
else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16054
@@ -11516,7 +13167,8 @@
16055
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16056
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16058
- int icode = (int) CODE_FOR_altivec_lvsr;
16059
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
16060
+ : (int) CODE_FOR_altivec_lvsl);
16061
enum machine_mode tmode = insn_data[icode].operand[0].mode;
16062
enum machine_mode mode = insn_data[icode].operand[1].mode;
16064
@@ -11591,7 +13243,14 @@
16070
+ ret = htm_expand_builtin (exp, target, &success);
16076
gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
16078
/* Handle simple unary operations. */
16079
@@ -11773,6 +13432,9 @@
16080
spe_init_builtins ();
16081
if (TARGET_EXTRA_BUILTINS)
16082
altivec_init_builtins ();
16084
+ htm_init_builtins ();
16086
if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16087
rs6000_common_init_builtins ();
16089
@@ -12118,6 +13780,10 @@
16090
= build_function_type_list (integer_type_node,
16091
integer_type_node, V4SI_type_node,
16092
V4SI_type_node, NULL_TREE);
16093
+ tree int_ftype_int_v2di_v2di
16094
+ = build_function_type_list (integer_type_node,
16095
+ integer_type_node, V2DI_type_node,
16096
+ V2DI_type_node, NULL_TREE);
16097
tree void_ftype_v4si
16098
= build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16099
tree v8hi_ftype_void
16100
@@ -12200,6 +13866,8 @@
16101
= build_function_type_list (integer_type_node,
16102
integer_type_node, V2DF_type_node,
16103
V2DF_type_node, NULL_TREE);
16104
+ tree v2di_ftype_v2di
16105
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16106
tree v4si_ftype_v4si
16107
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16108
tree v8hi_ftype_v8hi
16109
@@ -12335,6 +14003,9 @@
16111
type = int_ftype_int_opaque_opaque;
16114
+ type = int_ftype_int_v2di_v2di;
16117
type = int_ftype_int_v4si_v4si;
16119
@@ -12368,6 +14039,9 @@
16124
+ type = v2di_ftype_v2di;
16127
type = v4si_ftype_v4si;
16129
@@ -12500,6 +14174,79 @@
16130
def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
16134
+htm_init_builtins (void)
16136
+ HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16137
+ const struct builtin_description *d;
16141
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
16143
+ tree op[MAX_HTM_OPERANDS], type;
16144
+ HOST_WIDE_INT mask = d->mask;
16145
+ unsigned attr = rs6000_builtin_info[d->code].attr;
16146
+ bool void_func = (attr & RS6000_BTC_VOID);
16147
+ int attr_args = (attr & RS6000_BTC_TYPE_MASK);
16149
+ tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
16150
+ : unsigned_type_node;
16152
+ if ((mask & builtin_mask) != mask)
16154
+ if (TARGET_DEBUG_BUILTIN)
16155
+ fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
16159
+ if (d->name == 0)
16161
+ if (TARGET_DEBUG_BUILTIN)
16162
+ fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
16163
+ (long unsigned) i);
16167
+ op[nopnds++] = (void_func) ? void_type_node : argtype;
16169
+ if (attr_args == RS6000_BTC_UNARY)
16170
+ op[nopnds++] = argtype;
16171
+ else if (attr_args == RS6000_BTC_BINARY)
16173
+ op[nopnds++] = argtype;
16174
+ op[nopnds++] = argtype;
16176
+ else if (attr_args == RS6000_BTC_TERNARY)
16178
+ op[nopnds++] = argtype;
16179
+ op[nopnds++] = argtype;
16180
+ op[nopnds++] = argtype;
16186
+ type = build_function_type_list (op[0], NULL_TREE);
16189
+ type = build_function_type_list (op[0], op[1], NULL_TREE);
16192
+ type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
16195
+ type = build_function_type_list (op[0], op[1], op[2], op[3],
16199
+ gcc_unreachable ();
16202
+ def_builtin (d->name, type, d->code);
16206
/* Hash function for builtin functions with up to 3 arguments and a return
16209
@@ -12573,11 +14320,27 @@
16210
are type correct. */
16213
+ /* unsigned 1 argument functions. */
16214
+ case CRYPTO_BUILTIN_VSBOX:
16215
+ case P8V_BUILTIN_VGBBD:
16220
/* unsigned 2 argument functions. */
16221
case ALTIVEC_BUILTIN_VMULEUB_UNS:
16222
case ALTIVEC_BUILTIN_VMULEUH_UNS:
16223
case ALTIVEC_BUILTIN_VMULOUB_UNS:
16224
case ALTIVEC_BUILTIN_VMULOUH_UNS:
16225
+ case CRYPTO_BUILTIN_VCIPHER:
16226
+ case CRYPTO_BUILTIN_VCIPHERLAST:
16227
+ case CRYPTO_BUILTIN_VNCIPHER:
16228
+ case CRYPTO_BUILTIN_VNCIPHERLAST:
16229
+ case CRYPTO_BUILTIN_VPMSUMB:
16230
+ case CRYPTO_BUILTIN_VPMSUMH:
16231
+ case CRYPTO_BUILTIN_VPMSUMW:
16232
+ case CRYPTO_BUILTIN_VPMSUMD:
16233
+ case CRYPTO_BUILTIN_VPMSUM:
16237
@@ -12600,6 +14363,14 @@
16238
case VSX_BUILTIN_XXSEL_8HI_UNS:
16239
case VSX_BUILTIN_XXSEL_4SI_UNS:
16240
case VSX_BUILTIN_XXSEL_2DI_UNS:
16241
+ case CRYPTO_BUILTIN_VPERMXOR:
16242
+ case CRYPTO_BUILTIN_VPERMXOR_V2DI:
16243
+ case CRYPTO_BUILTIN_VPERMXOR_V4SI:
16244
+ case CRYPTO_BUILTIN_VPERMXOR_V8HI:
16245
+ case CRYPTO_BUILTIN_VPERMXOR_V16QI:
16246
+ case CRYPTO_BUILTIN_VSHASIGMAW:
16247
+ case CRYPTO_BUILTIN_VSHASIGMAD:
16248
+ case CRYPTO_BUILTIN_VSHASIGMA:
16252
@@ -12741,9 +14512,24 @@
16255
enum insn_code icode = d->icode;
16256
- if (d->name == 0 || icode == CODE_FOR_nothing)
16258
+ if (d->name == 0)
16260
+ if (TARGET_DEBUG_BUILTIN)
16261
+ fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
16262
+ (long unsigned)i);
16267
+ if (icode == CODE_FOR_nothing)
16269
+ if (TARGET_DEBUG_BUILTIN)
16270
+ fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
16276
type = builtin_function_type (insn_data[icode].operand[0].mode,
16277
insn_data[icode].operand[1].mode,
16278
insn_data[icode].operand[2].mode,
16279
@@ -12781,9 +14567,24 @@
16282
enum insn_code icode = d->icode;
16283
- if (d->name == 0 || icode == CODE_FOR_nothing)
16285
+ if (d->name == 0)
16287
+ if (TARGET_DEBUG_BUILTIN)
16288
+ fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
16289
+ (long unsigned)i);
16294
+ if (icode == CODE_FOR_nothing)
16296
+ if (TARGET_DEBUG_BUILTIN)
16297
+ fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
16303
mode0 = insn_data[icode].operand[0].mode;
16304
mode1 = insn_data[icode].operand[1].mode;
16305
mode2 = insn_data[icode].operand[2].mode;
16306
@@ -12843,9 +14644,24 @@
16309
enum insn_code icode = d->icode;
16310
- if (d->name == 0 || icode == CODE_FOR_nothing)
16312
+ if (d->name == 0)
16314
+ if (TARGET_DEBUG_BUILTIN)
16315
+ fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
16316
+ (long unsigned)i);
16321
+ if (icode == CODE_FOR_nothing)
16323
+ if (TARGET_DEBUG_BUILTIN)
16324
+ fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
16330
mode0 = insn_data[icode].operand[0].mode;
16331
mode1 = insn_data[icode].operand[1].mode;
16333
@@ -13632,7 +15448,7 @@
16334
static bool eliminated = false;
16337
- if (mode != SDmode)
16338
+ if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16339
ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16342
@@ -13691,31 +15507,228 @@
16346
-enum reload_reg_type {
16347
- GPR_REGISTER_TYPE,
16348
- VECTOR_REGISTER_TYPE,
16349
- OTHER_REGISTER_TYPE
16351
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16352
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16353
+ only work on the traditional altivec registers, note if an altivec register
16356
-static enum reload_reg_type
16357
-rs6000_reload_register_type (enum reg_class rclass)
16358
+static enum rs6000_reg_type
16359
+register_to_reg_type (rtx reg, bool *is_altivec)
16362
+ HOST_WIDE_INT regno;
16363
+ enum reg_class rclass;
16365
+ if (GET_CODE (reg) == SUBREG)
16366
+ reg = SUBREG_REG (reg);
16368
+ if (!REG_P (reg))
16369
+ return NO_REG_TYPE;
16371
+ regno = REGNO (reg);
16372
+ if (regno >= FIRST_PSEUDO_REGISTER)
16374
- case GENERAL_REGS:
16376
- return GPR_REGISTER_TYPE;
16377
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
16378
+ return PSEUDO_REG_TYPE;
16381
- case ALTIVEC_REGS:
16383
- return VECTOR_REGISTER_TYPE;
16384
+ regno = true_regnum (reg);
16385
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16386
+ return PSEUDO_REG_TYPE;
16390
- return OTHER_REGISTER_TYPE;
16391
+ gcc_assert (regno >= 0);
16393
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
16394
+ *is_altivec = true;
16396
+ rclass = rs6000_regno_regclass[regno];
16397
+ return reg_class_to_reg_type[(int)rclass];
16400
+/* Helper function for rs6000_secondary_reload to return true if a move to a
16401
+ different register classe is really a simple move. */
16404
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16405
+ enum rs6000_reg_type from_type,
16406
+ enum machine_mode mode)
16410
+ /* Add support for various direct moves available. In this function, we only
16411
+ look at cases where we don't need any extra registers, and one or more
16412
+ simple move insns are issued. At present, 32-bit integers are not allowed
16413
+ in FPR/VSX registers. Single precision binary floating is not a simple
16414
+ move because we need to convert to the single precision memory layout.
16415
+ The 4-byte SDmode can be moved. */
16416
+ size = GET_MODE_SIZE (mode);
16417
+ if (TARGET_DIRECT_MOVE
16418
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16419
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16420
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16423
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16424
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16425
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16428
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16429
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16430
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16436
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
16437
+ special direct moves that involve allocating an extra register, return the
16438
+ insn code of the helper function if there is such a function or
16439
+ CODE_FOR_nothing if not. */
16442
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16443
+ enum rs6000_reg_type from_type,
16444
+ enum machine_mode mode,
16445
+ secondary_reload_info *sri,
16448
+ bool ret = false;
16449
+ enum insn_code icode = CODE_FOR_nothing;
16451
+ int size = GET_MODE_SIZE (mode);
16453
+ if (TARGET_POWERPC64)
16457
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
16458
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
16459
+ 64-bit values back together. */
16460
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16462
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16463
+ icode = reg_addr[mode].reload_vsx_gpr;
16466
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
16467
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
16468
+ bottom 64-bit value. */
16469
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16471
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16472
+ icode = reg_addr[mode].reload_gpr_vsx;
16476
+ else if (mode == SFmode)
16478
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16480
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
16481
+ icode = reg_addr[mode].reload_gpr_vsx;
16484
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16486
+ cost = 2; /* mtvsrz, xscvspdpn. */
16487
+ icode = reg_addr[mode].reload_vsx_gpr;
16492
+ if (TARGET_POWERPC64 && size == 16)
16494
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
16495
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
16496
+ 64-bit values back together. */
16497
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16499
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16500
+ icode = reg_addr[mode].reload_vsx_gpr;
16503
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
16504
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
16505
+ bottom 64-bit value. */
16506
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16508
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16509
+ icode = reg_addr[mode].reload_gpr_vsx;
16513
+ else if (!TARGET_POWERPC64 && size == 8)
16515
+ /* Handle moving 64-bit values from GPRs to floating point registers on
16516
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16517
+ values back together. Altivec register classes must be handled
16518
+ specially since a different instruction is used, and the secondary
16519
+ reload support requires a single instruction class in the scratch
16520
+ register constraint. However, right now TFmode is not allowed in
16521
+ Altivec registers, so the pattern will never match. */
16522
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16524
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16525
+ icode = reg_addr[mode].reload_fpr_gpr;
16529
+ if (icode != CODE_FOR_nothing)
16534
+ sri->icode = icode;
16535
+ sri->extra_cost = cost;
16542
+/* Return whether a move between two register classes can be done either
16543
+ directly (simple move) or via a pattern that uses a single extra temporary
16544
+ (using power8's direct move in this case. */
16547
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16548
+ enum rs6000_reg_type from_type,
16549
+ enum machine_mode mode,
16550
+ secondary_reload_info *sri,
16553
+ /* Fall back to load/store reloads if either type is not a register. */
16554
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16557
+ /* If we haven't allocated registers yet, assume the move can be done for the
16558
+ standard register types. */
16559
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16560
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16561
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16564
+ /* Moves to the same set of registers is a simple move for non-specialized
16566
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16569
+ /* Check whether a simple move can be done directly. */
16570
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16574
+ sri->icode = CODE_FOR_nothing;
16575
+ sri->extra_cost = 0;
16580
+ /* Now check if we can do it in a few steps. */
16581
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16585
/* Inform reload about cases where moving X with a mode MODE to a register in
16586
RCLASS requires an extra scratch or immediate register. Return the class
16587
needed for the immediate register.
16588
@@ -13739,12 +15752,36 @@
16589
bool default_p = false;
16591
sri->icode = CODE_FOR_nothing;
16593
+ ? reg_addr[mode].reload_load
16594
+ : reg_addr[mode].reload_store);
16596
- /* Convert vector loads and stores into gprs to use an additional base
16598
- icode = rs6000_vector_reload[mode][in_p != false];
16599
- if (icode != CODE_FOR_nothing)
16600
+ if (REG_P (x) || register_operand (x, mode))
16602
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16603
+ bool altivec_p = (rclass == ALTIVEC_REGS);
16604
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16608
+ enum rs6000_reg_type exchange = to_type;
16609
+ to_type = from_type;
16610
+ from_type = exchange;
16613
+ /* Can we do a direct move of some sort? */
16614
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16617
+ icode = (enum insn_code)sri->icode;
16618
+ default_p = false;
16623
+ /* Handle vector moves with reload helper functions. */
16624
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
16627
sri->icode = CODE_FOR_nothing;
16628
sri->extra_cost = 0;
16629
@@ -13755,22 +15792,43 @@
16631
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
16632
an extra register in that case, but it would need an extra
16633
- register if the addressing is reg+reg or (reg+reg)&(-16). */
16634
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
16635
+ case load/store quad. */
16636
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
16638
- if (!legitimate_indirect_address_p (addr, false)
16639
- && !rs6000_legitimate_offset_address_p (TImode, addr,
16641
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
16642
+ && GET_MODE_SIZE (mode) == 16
16643
+ && quad_memory_operand (x, mode))
16645
sri->icode = icode;
16646
+ sri->extra_cost = 2;
16649
+ else if (!legitimate_indirect_address_p (addr, false)
16650
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
16653
+ sri->icode = icode;
16654
/* account for splitting the loads, and converting the
16655
address from reg+reg to reg. */
16656
sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
16657
+ ((GET_CODE (addr) == AND) ? 1 : 0));
16660
- /* Loads to and stores from vector registers can only do reg+reg
16661
- addressing. Altivec registers can also do (reg+reg)&(-16). */
16662
+ /* Allow scalar loads to/from the traditional floating point
16663
+ registers, even if VSX memory is set. */
16664
+ else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
16665
+ && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16666
+ && (legitimate_indirect_address_p (addr, false)
16667
+ || legitimate_indirect_address_p (addr, false)
16668
+ || rs6000_legitimate_offset_address_p (mode, addr,
16672
+ /* Loads to and stores from vector registers can only do reg+reg
16673
+ addressing. Altivec registers can also do (reg+reg)&(-16). Allow
16674
+ scalar modes loading up the traditional floating point registers
16675
+ to use offset addresses. */
16676
else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
16677
|| rclass == FLOAT_REGS || rclass == NO_REGS)
16679
@@ -13814,12 +15872,12 @@
16682
enum reg_class xclass = REGNO_REG_CLASS (regno);
16683
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
16684
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
16685
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
16686
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
16688
/* If memory is needed, use default_secondary_reload to create the
16690
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
16691
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
16695
@@ -13829,7 +15887,7 @@
16698
else if (TARGET_POWERPC64
16699
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
16700
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16702
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
16704
@@ -13868,7 +15926,7 @@
16707
else if (!TARGET_POWERPC64
16708
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
16709
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16711
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
16713
@@ -13946,6 +16004,36 @@
16717
+/* Better tracing for rs6000_secondary_reload_inner. */
16720
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
16723
+ rtx set, clobber;
16725
+ gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
16727
+ fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
16728
+ store_p ? "store" : "load");
16731
+ set = gen_rtx_SET (VOIDmode, mem, reg);
16733
+ set = gen_rtx_SET (VOIDmode, reg, mem);
16735
+ clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16736
+ debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
16740
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
16743
+ rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
16744
+ gcc_unreachable ();
16747
/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
16748
to SP+reg addressing. */
16750
@@ -13964,19 +16052,14 @@
16753
if (TARGET_DEBUG_ADDR)
16755
- fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
16756
- store_p ? "store" : "load");
16757
- fprintf (stderr, "reg:\n");
16759
- fprintf (stderr, "mem:\n");
16761
- fprintf (stderr, "scratch:\n");
16762
- debug_rtx (scratch);
16764
+ rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
16766
- gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
16767
- gcc_assert (GET_CODE (mem) == MEM);
16768
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16769
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16771
+ if (GET_CODE (mem) != MEM)
16772
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16774
rclass = REGNO_REG_CLASS (regno);
16775
addr = XEXP (mem, 0);
16777
@@ -13995,19 +16078,24 @@
16778
if (GET_CODE (addr) == PRE_MODIFY)
16780
scratch_or_premodify = XEXP (addr, 0);
16781
- gcc_assert (REG_P (scratch_or_premodify));
16782
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
16783
+ if (!REG_P (scratch_or_premodify))
16784
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16786
+ if (GET_CODE (XEXP (addr, 1)) != PLUS)
16787
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16789
addr = XEXP (addr, 1);
16792
if (GET_CODE (addr) == PLUS
16793
&& (and_op2 != NULL_RTX
16794
- || !rs6000_legitimate_offset_address_p (TImode, addr,
16795
+ || !rs6000_legitimate_offset_address_p (PTImode, addr,
16798
addr_op1 = XEXP (addr, 0);
16799
addr_op2 = XEXP (addr, 1);
16800
- gcc_assert (legitimate_indirect_address_p (addr_op1, false));
16801
+ if (!legitimate_indirect_address_p (addr_op1, false))
16802
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16804
if (!REG_P (addr_op2)
16805
&& (GET_CODE (addr_op2) != CONST_INT
16806
@@ -14035,7 +16123,7 @@
16807
scratch_or_premodify = scratch;
16809
else if (!legitimate_indirect_address_p (addr, false)
16810
- && !rs6000_legitimate_offset_address_p (TImode, addr,
16811
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
16814
if (TARGET_DEBUG_ADDR)
16815
@@ -14051,9 +16139,21 @@
16819
- /* Float/Altivec registers can only handle reg+reg addressing. Move
16820
- other addresses into a scratch register. */
16821
+ /* Float registers can do offset+reg addressing for scalar types. */
16823
+ if (legitimate_indirect_address_p (addr, false) /* reg */
16824
+ || legitimate_indexed_address_p (addr, false) /* reg+reg */
16825
+ || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16826
+ && and_op2 == NULL_RTX
16827
+ && scratch_or_premodify == scratch
16828
+ && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
16831
+ /* If this isn't a legacy floating point load/store, fall through to the
16834
+ /* VSX/Altivec registers can only handle reg+reg addressing. Move other
16835
+ addresses into a scratch register. */
16839
@@ -14073,36 +16173,38 @@
16840
/* If we aren't using a VSX load, save the PRE_MODIFY register and use it
16841
as the address later. */
16842
if (GET_CODE (addr) == PRE_MODIFY
16843
- && (!VECTOR_MEM_VSX_P (mode)
16844
+ && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
16845
+ && (rclass != FLOAT_REGS
16846
+ || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
16847
|| and_op2 != NULL_RTX
16848
|| !legitimate_indexed_address_p (XEXP (addr, 1), false)))
16850
scratch_or_premodify = XEXP (addr, 0);
16851
- gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
16853
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
16854
+ if (!legitimate_indirect_address_p (scratch_or_premodify, false))
16855
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16857
+ if (GET_CODE (XEXP (addr, 1)) != PLUS)
16858
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16860
addr = XEXP (addr, 1);
16863
if (legitimate_indirect_address_p (addr, false) /* reg */
16864
|| legitimate_indexed_address_p (addr, false) /* reg+reg */
16865
- || GET_CODE (addr) == PRE_MODIFY /* VSX pre-modify */
16866
|| (GET_CODE (addr) == AND /* Altivec memory */
16867
+ && rclass == ALTIVEC_REGS
16868
&& GET_CODE (XEXP (addr, 1)) == CONST_INT
16869
&& INTVAL (XEXP (addr, 1)) == -16
16870
- && VECTOR_MEM_ALTIVEC_P (mode))
16871
- || (rclass == FLOAT_REGS /* legacy float mem */
16872
- && GET_MODE_SIZE (mode) == 8
16873
- && and_op2 == NULL_RTX
16874
- && scratch_or_premodify == scratch
16875
- && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
16876
+ && (legitimate_indirect_address_p (XEXP (addr, 0), false)
16877
+ || legitimate_indexed_address_p (XEXP (addr, 0), false))))
16880
else if (GET_CODE (addr) == PLUS)
16882
addr_op1 = XEXP (addr, 0);
16883
addr_op2 = XEXP (addr, 1);
16884
- gcc_assert (REG_P (addr_op1));
16885
+ if (!REG_P (addr_op1))
16886
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16888
if (TARGET_DEBUG_ADDR)
16890
@@ -14121,7 +16223,8 @@
16893
else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
16894
- || GET_CODE (addr) == CONST_INT || REG_P (addr))
16895
+ || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
16898
if (TARGET_DEBUG_ADDR)
16900
@@ -14137,12 +16240,12 @@
16904
- gcc_unreachable ();
16905
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16910
- gcc_unreachable ();
16911
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16914
/* If the original address involved a pre-modify that we couldn't use the VSX
16915
@@ -14189,7 +16292,7 @@
16916
/* Adjust the address if it changed. */
16917
if (addr != XEXP (mem, 0))
16919
- mem = change_address (mem, mode, addr);
16920
+ mem = replace_equiv_address_nv (mem, addr);
16921
if (TARGET_DEBUG_ADDR)
16922
fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
16924
@@ -14254,8 +16357,10 @@
16928
-/* Allocate a 64-bit stack slot to be used for copying SDmode
16929
- values through if this function has any SDmode references. */
16930
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
16931
+ this function has any SDmode references. If we are on a power7 or later, we
16932
+ don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
16933
+ can load/store the value. */
16936
rs6000_alloc_sdmode_stack_slot (void)
16937
@@ -14266,6 +16371,9 @@
16939
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
16941
+ if (TARGET_NO_SDMODE_STACK)
16945
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
16947
@@ -14326,8 +16434,7 @@
16949
enum machine_mode mode = GET_MODE (x);
16951
- if (VECTOR_UNIT_VSX_P (mode)
16952
- && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
16953
+ if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
16956
if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16957
@@ -14382,60 +16489,45 @@
16958
set and vice versa. */
16961
-rs6000_secondary_memory_needed (enum reg_class class1,
16962
- enum reg_class class2,
16963
+rs6000_secondary_memory_needed (enum reg_class from_class,
16964
+ enum reg_class to_class,
16965
enum machine_mode mode)
16967
- if (class1 == class2)
16969
+ enum rs6000_reg_type from_type, to_type;
16970
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
16971
+ || (to_class == ALTIVEC_REGS));
16973
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
16974
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
16975
- between these classes. But we need memory for other things that can go in
16976
- FLOAT_REGS like SFmode. */
16978
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
16979
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
16980
- || class1 == FLOAT_REGS))
16981
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
16982
- && class2 != FLOAT_REGS);
16983
+ /* If a simple/direct move is available, we don't need secondary memory */
16984
+ from_type = reg_class_to_reg_type[(int)from_class];
16985
+ to_type = reg_class_to_reg_type[(int)to_class];
16987
- if (class1 == VSX_REGS || class2 == VSX_REGS)
16989
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
16990
+ (secondary_reload_info *)0, altivec_p))
16993
- if (class1 == FLOAT_REGS
16994
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
16995
- || ((mode != DFmode)
16996
- && (mode != DDmode)
16997
- && (mode != DImode))))
16998
+ /* If we have a floating point or vector register class, we need to use
16999
+ memory to transfer the data. */
17000
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17003
- if (class2 == FLOAT_REGS
17004
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
17005
- || ((mode != DFmode)
17006
- && (mode != DDmode)
17007
- && (mode != DImode))))
17010
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
17016
/* Debug version of rs6000_secondary_memory_needed. */
17018
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
17019
- enum reg_class class2,
17020
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17021
+ enum reg_class to_class,
17022
enum machine_mode mode)
17024
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
17025
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17028
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
17029
- "class2 = %s, mode = %s\n",
17030
- ret ? "true" : "false", reg_class_names[class1],
17031
- reg_class_names[class2], GET_MODE_NAME (mode));
17032
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17033
+ "to_class = %s, mode = %s\n",
17034
+ ret ? "true" : "false",
17035
+ reg_class_names[from_class],
17036
+ reg_class_names[to_class],
17037
+ GET_MODE_NAME (mode));
17041
@@ -14502,12 +16594,18 @@
17042
return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
17044
/* Memory, and FP/altivec registers can go into fp/altivec registers under
17046
+ VSX. However, for scalar variables, use the traditional floating point
17047
+ registers so that we can use offset+register addressing. */
17049
&& (regno == -1 || VSX_REGNO_P (regno))
17050
&& VSX_REG_CLASS_P (rclass))
17053
+ if (GET_MODE_SIZE (mode) < 16)
17054
+ return FLOAT_REGS;
17059
/* Memory, and AltiVec registers can go into AltiVec registers. */
17060
if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17061
&& rclass == ALTIVEC_REGS)
17062
@@ -14551,8 +16649,42 @@
17063
if (from_size != to_size)
17065
enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17066
- return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
17067
- && reg_classes_intersect_p (xclass, rclass));
17069
+ if (reg_classes_intersect_p (xclass, rclass))
17071
+ unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17072
+ unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17074
+ /* Don't allow 64-bit types to overlap with 128-bit types that take a
17075
+ single register under VSX because the scalar part of the register
17076
+ is in the upper 64-bits, and not the lower 64-bits. Types like
17077
+ TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17078
+ IEEE floating point can't overlap, and neither can small
17081
+ if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17084
+ /* TDmode in floating-mode registers must always go into a register
17085
+ pair with the most significant word in the even-numbered register
17086
+ to match ISA requirements. In little-endian mode, this does not
17087
+ match subreg numbering, so we cannot allow subregs. */
17088
+ if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17091
+ if (from_size < 8 || to_size < 8)
17094
+ if (from_size == 8 && (8 * to_nregs) != to_size)
17097
+ if (to_size == 8 && (8 * from_nregs) != from_size)
17106
if (TARGET_E500_DOUBLE
17107
@@ -14566,10 +16698,19 @@
17108
/* Since the VSX register set includes traditional floating point registers
17109
and altivec registers, just check for the size being different instead of
17110
trying to check whether the modes are vector modes. Otherwise it won't
17111
- allow say DF and DI to change classes. */
17112
+ allow say DF and DI to change classes. For types like TFmode and TDmode
17113
+ that take 2 64-bit registers, rather than a single 128-bit register, don't
17114
+ allow subregs of those types to other 128 bit types. */
17115
if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17116
- return (from_size != 8 && from_size != 16);
17118
+ unsigned num_regs = (from_size + 15) / 16;
17119
+ if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17120
+ || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17123
+ return (from_size != 8 && from_size != 16);
17126
if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17127
&& (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17129
@@ -14599,6 +16740,161 @@
17133
+/* Return a string to do a move operation of 128 bits of data. */
17136
+rs6000_output_move_128bit (rtx operands[])
17138
+ rtx dest = operands[0];
17139
+ rtx src = operands[1];
17140
+ enum machine_mode mode = GET_MODE (dest);
17143
+ bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17144
+ bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17146
+ if (REG_P (dest))
17148
+ dest_regno = REGNO (dest);
17149
+ dest_gpr_p = INT_REGNO_P (dest_regno);
17150
+ dest_fp_p = FP_REGNO_P (dest_regno);
17151
+ dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17152
+ dest_vsx_p = dest_fp_p | dest_vmx_p;
17157
+ dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17162
+ src_regno = REGNO (src);
17163
+ src_gpr_p = INT_REGNO_P (src_regno);
17164
+ src_fp_p = FP_REGNO_P (src_regno);
17165
+ src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17166
+ src_vsx_p = src_fp_p | src_vmx_p;
17171
+ src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17174
+ /* Register moves. */
17175
+ if (dest_regno >= 0 && src_regno >= 0)
17182
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17186
+ else if (TARGET_VSX && dest_vsx_p)
17189
+ return "xxlor %x0,%x1,%x1";
17191
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
17195
+ else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17196
+ return "vor %0,%1,%1";
17198
+ else if (dest_fp_p && src_fp_p)
17203
+ else if (dest_regno >= 0 && MEM_P (src))
17207
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17208
+ return "lq %0,%1";
17213
+ else if (TARGET_ALTIVEC && dest_vmx_p
17214
+ && altivec_indexed_or_indirect_operand (src, mode))
17215
+ return "lvx %0,%y1";
17217
+ else if (TARGET_VSX && dest_vsx_p)
17219
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17220
+ return "lxvw4x %x0,%y1";
17222
+ return "lxvd2x %x0,%y1";
17225
+ else if (TARGET_ALTIVEC && dest_vmx_p)
17226
+ return "lvx %0,%y1";
17228
+ else if (dest_fp_p)
17233
+ else if (src_regno >= 0 && MEM_P (dest))
17237
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17238
+ return "stq %1,%0";
17243
+ else if (TARGET_ALTIVEC && src_vmx_p
17244
+ && altivec_indexed_or_indirect_operand (src, mode))
17245
+ return "stvx %1,%y0";
17247
+ else if (TARGET_VSX && src_vsx_p)
17249
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17250
+ return "stxvw4x %x1,%y0";
17252
+ return "stxvd2x %x1,%y0";
17255
+ else if (TARGET_ALTIVEC && src_vmx_p)
17256
+ return "stvx %1,%y0";
17258
+ else if (src_fp_p)
17263
+ else if (dest_regno >= 0
17264
+ && (GET_CODE (src) == CONST_INT
17265
+ || GET_CODE (src) == CONST_DOUBLE
17266
+ || GET_CODE (src) == CONST_VECTOR))
17271
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
17272
+ return "xxlxor %x0,%x0,%x0";
17274
+ else if (TARGET_ALTIVEC && dest_vmx_p)
17275
+ return output_vec_const_move (operands);
17278
+ if (TARGET_DEBUG_ADDR)
17280
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
17281
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
17284
+ gcc_unreachable ();
17288
/* Given a comparison operation, return the bit number in CCR to test. We
17289
know this is a valid comparison.
17291
@@ -14824,6 +17120,7 @@
17292
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
17299
@@ -15303,7 +17600,7 @@
17303
- /* Like 'L', for third word of TImode */
17304
+ /* Like 'L', for third word of TImode/PTImode */
17306
fputs (reg_names[REGNO (x) + 2], file);
17307
else if (MEM_P (x))
17308
@@ -15353,7 +17650,7 @@
17312
- /* Like 'L', for last word of TImode. */
17313
+ /* Like 'L', for last word of TImode/PTImode. */
17315
fputs (reg_names[REGNO (x) + 3], file);
17316
else if (MEM_P (x))
17317
@@ -15384,7 +17681,8 @@
17318
if ((TARGET_SPE || TARGET_E500_DOUBLE)
17319
&& (GET_MODE_SIZE (GET_MODE (x)) == 8
17320
|| GET_MODE (x) == TFmode
17321
- || GET_MODE (x) == TImode))
17322
+ || GET_MODE (x) == TImode
17323
+ || GET_MODE (x) == PTImode))
17325
/* Handle [reg]. */
17327
@@ -16767,7 +19065,8 @@
17330
/* A subroutine of the atomic operation splitters. Emit a load-locked
17331
- instruction in MODE. */
17332
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
17333
+ the zero_extend operation. */
17336
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
17337
@@ -16776,12 +19075,26 @@
17342
+ fn = gen_load_lockedqi;
17345
+ fn = gen_load_lockedhi;
17348
- fn = gen_load_lockedsi;
17349
+ if (GET_MODE (mem) == QImode)
17350
+ fn = gen_load_lockedqi_si;
17351
+ else if (GET_MODE (mem) == HImode)
17352
+ fn = gen_load_lockedhi_si;
17354
+ fn = gen_load_lockedsi;
17357
fn = gen_load_lockeddi;
17360
+ fn = gen_load_lockedti;
17363
gcc_unreachable ();
17365
@@ -16798,12 +19111,21 @@
17370
+ fn = gen_store_conditionalqi;
17373
+ fn = gen_store_conditionalhi;
17376
fn = gen_store_conditionalsi;
17379
fn = gen_store_conditionaldi;
17382
+ fn = gen_store_conditionalti;
17385
gcc_unreachable ();
17387
@@ -16950,7 +19272,7 @@
17389
rtx boolval, retval, mem, oldval, newval, cond;
17390
rtx label1, label2, x, mask, shift;
17391
- enum machine_mode mode;
17392
+ enum machine_mode mode, orig_mode;
17393
enum memmodel mod_s, mod_f;
17396
@@ -16962,23 +19284,30 @@
17397
is_weak = (INTVAL (operands[5]) != 0);
17398
mod_s = (enum memmodel) INTVAL (operands[6]);
17399
mod_f = (enum memmodel) INTVAL (operands[7]);
17400
- mode = GET_MODE (mem);
17401
+ orig_mode = mode = GET_MODE (mem);
17403
mask = shift = NULL_RTX;
17404
if (mode == QImode || mode == HImode)
17406
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17408
- /* Shift and mask OLDVAL into position with the word. */
17409
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
17410
+ lwarx and shift/mask operations. With power8, we need to do the
17411
+ comparison in SImode, but the store is still done in QI/HImode. */
17412
oldval = convert_modes (SImode, mode, oldval, 1);
17413
- oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
17414
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
17416
- /* Shift and mask NEWVAL into position within the word. */
17417
- newval = convert_modes (SImode, mode, newval, 1);
17418
- newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
17419
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
17420
+ if (!TARGET_SYNC_HI_QI)
17422
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17424
+ /* Shift and mask OLDVAL into position with the word. */
17425
+ oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
17426
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
17428
+ /* Shift and mask NEWVAL into position within the word. */
17429
+ newval = convert_modes (SImode, mode, newval, 1);
17430
+ newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
17431
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
17434
/* Prepare to adjust the return value. */
17435
retval = gen_reg_rtx (SImode);
17437
@@ -17006,7 +19335,25 @@
17440
cond = gen_reg_rtx (CCmode);
17441
- x = gen_rtx_COMPARE (CCmode, x, oldval);
17442
+ /* If we have TImode, synthesize a comparison. */
17443
+ if (mode != TImode)
17444
+ x = gen_rtx_COMPARE (CCmode, x, oldval);
17447
+ rtx xor1_result = gen_reg_rtx (DImode);
17448
+ rtx xor2_result = gen_reg_rtx (DImode);
17449
+ rtx or_result = gen_reg_rtx (DImode);
17450
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
17451
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
17452
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
17453
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
17455
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
17456
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
17457
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
17458
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
17461
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
17463
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17464
@@ -17016,7 +19363,7 @@
17466
x = rs6000_mask_atomic_subword (retval, newval, mask);
17468
- emit_store_conditional (mode, cond, mem, x);
17469
+ emit_store_conditional (orig_mode, cond, mem, x);
17473
@@ -17034,6 +19381,8 @@
17476
rs6000_finish_atomic_subword (operands[1], retval, shift);
17477
+ else if (mode != GET_MODE (operands[1]))
17478
+ convert_move (operands[1], retval, 1);
17480
/* In all cases, CR0 contains EQ on success, and NE on failure. */
17481
x = gen_rtx_EQ (SImode, cond, const0_rtx);
17482
@@ -17057,7 +19406,7 @@
17483
mode = GET_MODE (mem);
17485
mask = shift = NULL_RTX;
17486
- if (mode == QImode || mode == HImode)
17487
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
17489
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17491
@@ -17106,53 +19455,70 @@
17493
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
17494
enum machine_mode mode = GET_MODE (mem);
17495
+ enum machine_mode store_mode = mode;
17496
rtx label, x, cond, mask, shift;
17497
rtx before = orig_before, after = orig_after;
17499
mask = shift = NULL_RTX;
17500
+ /* On power8, we want to use SImode for the operation. On previous systems,
17501
+ use the operation in a subword and shift/mask to get the proper byte or
17503
if (mode == QImode || mode == HImode)
17505
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17506
+ if (TARGET_SYNC_HI_QI)
17508
+ val = convert_modes (SImode, mode, val, 1);
17510
- /* Shift and mask VAL into position with the word. */
17511
- val = convert_modes (SImode, mode, val, 1);
17512
- val = expand_simple_binop (SImode, ASHIFT, val, shift,
17513
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
17516
+ /* Prepare to adjust the return value. */
17517
+ before = gen_reg_rtx (SImode);
17519
+ after = gen_reg_rtx (SImode);
17526
- /* We've already zero-extended VAL. That is sufficient to
17527
- make certain that it does not affect other bits. */
17530
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17533
- /* If we make certain that all of the other bits in VAL are
17534
- set, that will be sufficient to not affect other bits. */
17535
- x = gen_rtx_NOT (SImode, mask);
17536
- x = gen_rtx_IOR (SImode, x, val);
17537
- emit_insn (gen_rtx_SET (VOIDmode, val, x));
17540
+ /* Shift and mask VAL into position with the word. */
17541
+ val = convert_modes (SImode, mode, val, 1);
17542
+ val = expand_simple_binop (SImode, ASHIFT, val, shift,
17543
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
17548
- /* These will all affect bits outside the field and need
17549
- adjustment via MASK within the loop. */
17555
+ /* We've already zero-extended VAL. That is sufficient to
17556
+ make certain that it does not affect other bits. */
17561
- gcc_unreachable ();
17563
+ /* If we make certain that all of the other bits in VAL are
17564
+ set, that will be sufficient to not affect other bits. */
17565
+ x = gen_rtx_NOT (SImode, mask);
17566
+ x = gen_rtx_IOR (SImode, x, val);
17567
+ emit_insn (gen_rtx_SET (VOIDmode, val, x));
17574
+ /* These will all affect bits outside the field and need
17575
+ adjustment via MASK within the loop. */
17579
+ gcc_unreachable ();
17582
+ /* Prepare to adjust the return value. */
17583
+ before = gen_reg_rtx (SImode);
17585
+ after = gen_reg_rtx (SImode);
17586
+ store_mode = mode = SImode;
17589
- /* Prepare to adjust the return value. */
17590
- before = gen_reg_rtx (SImode);
17592
- after = gen_reg_rtx (SImode);
17596
mem = rs6000_pre_atomic_barrier (mem, model);
17597
@@ -17185,9 +19551,11 @@
17598
NULL_RTX, 1, OPTAB_LIB_WIDEN);
17599
x = rs6000_mask_atomic_subword (before, x, mask);
17601
+ else if (store_mode != mode)
17602
+ x = convert_modes (store_mode, mode, x, 1);
17604
cond = gen_reg_rtx (CCmode);
17605
- emit_store_conditional (mode, cond, mem, x);
17606
+ emit_store_conditional (store_mode, cond, mem, x);
17608
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17609
emit_unlikely_jump (x, label);
17610
@@ -17196,11 +19564,22 @@
17614
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17615
+ then do the calcuations in a SImode register. */
17617
rs6000_finish_atomic_subword (orig_before, before, shift);
17619
rs6000_finish_atomic_subword (orig_after, after, shift);
17621
+ else if (store_mode != mode)
17623
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
17624
+ operation and then do the calcuations in a SImode register. */
17626
+ convert_move (orig_before, before, 1);
17628
+ convert_move (orig_after, after, 1);
17630
else if (orig_after && after != orig_after)
17631
emit_move_insn (orig_after, after);
17633
@@ -17240,6 +19619,39 @@
17635
gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
17637
+ /* TDmode residing in FP registers is special, since the ISA requires that
17638
+ the lower-numbered word of a register pair is always the most significant
17639
+ word, even in little-endian mode. This does not match the usual subreg
17640
+ semantics, so we cannnot use simplify_gen_subreg in those cases. Access
17641
+ the appropriate constituent registers "by hand" in little-endian mode.
17643
+ Note we do not need to check for destructive overlap here since TDmode
17644
+ can only reside in even/odd register pairs. */
17645
+ if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
17647
+ rtx p_src, p_dst;
17650
+ for (i = 0; i < nregs; i++)
17652
+ if (REG_P (src) && FP_REGNO_P (REGNO (src)))
17653
+ p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
17655
+ p_src = simplify_gen_subreg (reg_mode, src, mode,
17656
+ i * reg_mode_size);
17658
+ if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
17659
+ p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
17661
+ p_dst = simplify_gen_subreg (reg_mode, dst, mode,
17662
+ i * reg_mode_size);
17664
+ emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
17670
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
17672
/* Move register range backwards, if we might have destructive
17673
@@ -17694,7 +20106,7 @@
17677
- gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
17678
+ gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
17679
if (info->first_fp_reg_save > 61)
17680
strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
17681
strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
17682
@@ -17705,7 +20117,8 @@
17683
by the static chain. It would require too much fiddling and the
17684
static chain is rarely used anyway. FPRs are saved w.r.t the stack
17685
pointer on Darwin, and AIX uses r1 or r12. */
17686
- if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
17687
+ if (using_static_chain_p
17688
+ && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
17689
strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
17691
| SAVE_INLINE_VRS | REST_INLINE_VRS);
17692
@@ -17838,7 +20251,35 @@
17693
The required alignment for AIX configurations is two words (i.e., 8
17696
+ The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
17698
+ SP----> +---------------------------------------+
17699
+ | Back chain to caller | 0
17700
+ +---------------------------------------+
17701
+ | Save area for CR | 8
17702
+ +---------------------------------------+
17704
+ +---------------------------------------+
17705
+ | Saved TOC pointer | 24
17706
+ +---------------------------------------+
17707
+ | Parameter save area (P) | 32
17708
+ +---------------------------------------+
17709
+ | Alloca space (A) | 32+P
17710
+ +---------------------------------------+
17711
+ | Local variable space (L) | 32+P+A
17712
+ +---------------------------------------+
17713
+ | Save area for AltiVec registers (W) | 32+P+A+L
17714
+ +---------------------------------------+
17715
+ | AltiVec alignment padding (Y) | 32+P+A+L+W
17716
+ +---------------------------------------+
17717
+ | Save area for GP registers (G) | 32+P+A+L+W+Y
17718
+ +---------------------------------------+
17719
+ | Save area for FP registers (F) | 32+P+A+L+W+Y+G
17720
+ +---------------------------------------+
17721
+ old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
17722
+ +---------------------------------------+
17725
V.4 stack frames look like:
17727
SP----> +---------------------------------------+
17728
@@ -17898,6 +20339,7 @@
17729
rs6000_stack_t *info_ptr = &stack_info;
17730
int reg_size = TARGET_32BIT ? 4 : 8;
17735
HOST_WIDE_INT non_fixed_size;
17736
@@ -17991,6 +20433,18 @@
17740
+ /* In the ELFv2 ABI, we also need to allocate space for separate
17741
+ CR field save areas if the function calls __builtin_eh_return. */
17742
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
17744
+ /* This hard-codes that we have three call-saved CR fields. */
17745
+ ehcr_size = 3 * reg_size;
17746
+ /* We do *not* use the regular CR save mechanism. */
17747
+ info_ptr->cr_save_p = 0;
17752
/* Determine various sizes. */
17753
info_ptr->reg_size = reg_size;
17754
info_ptr->fixed_size = RS6000_SAVE_AREA;
17755
@@ -18030,6 +20484,7 @@
17756
gcc_unreachable ();
17761
info_ptr->fp_save_offset = - info_ptr->fp_size;
17762
info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
17763
@@ -18059,6 +20514,8 @@
17766
info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
17768
+ info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
17769
info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
17770
info_ptr->lr_save_offset = 2*reg_size;
17772
@@ -18121,6 +20578,7 @@
17773
+ info_ptr->spe_gp_size
17774
+ info_ptr->spe_padding_size
17777
+ info_ptr->cr_size
17778
+ info_ptr->vrsave_size,
17780
@@ -18134,7 +20592,7 @@
17782
/* Determine if we need to save the link register. */
17783
if (info_ptr->calls_p
17784
- || (DEFAULT_ABI == ABI_AIX
17785
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17787
&& !TARGET_PROFILE_KERNEL)
17788
|| (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
17789
@@ -18280,6 +20738,7 @@
17790
default: abi_string = "Unknown"; break;
17791
case ABI_NONE: abi_string = "NONE"; break;
17792
case ABI_AIX: abi_string = "AIX"; break;
17793
+ case ABI_ELFv2: abi_string = "ELFv2"; break;
17794
case ABI_DARWIN: abi_string = "Darwin"; break;
17795
case ABI_V4: abi_string = "V.4"; break;
17797
@@ -18401,7 +20860,8 @@
17798
/* Currently we don't optimize very well between prolog and body
17799
code and for PIC code the code can be actually quite bad, so
17800
don't try to be too clever here. */
17801
- if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
17803
+ || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
17805
cfun->machine->ra_needs_full_frame = 1;
17807
@@ -18460,13 +20920,13 @@
17811
- /* Under the AIX ABI we can't allow calls to non-local functions,
17812
- because the callee may have a different TOC pointer to the
17813
- caller and there's no way to ensure we restore the TOC when we
17814
- return. With the secure-plt SYSV ABI we can't make non-local
17815
+ /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
17816
+ functions, because the callee may have a different TOC pointer to
17817
+ the caller and there's no way to ensure we restore the TOC when
17818
+ we return. With the secure-plt SYSV ABI we can't make non-local
17819
calls when -fpic/PIC because the plt call stubs use r30. */
17820
if (DEFAULT_ABI == ABI_DARWIN
17821
- || (DEFAULT_ABI == ABI_AIX
17822
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17824
&& !DECL_EXTERNAL (decl)
17825
&& (*targetm.binds_local_p) (decl))
17826
@@ -18567,7 +21027,7 @@
17828
dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
17830
- if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
17831
+ if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
17834
rtx lab, tmp1, tmp2, got;
17835
@@ -18595,7 +21055,7 @@
17836
emit_insn (gen_load_toc_v4_pic_si ());
17837
emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
17839
- else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
17840
+ else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
17843
rtx temp0 = (fromprolog
17844
@@ -18643,7 +21103,7 @@
17848
- gcc_assert (DEFAULT_ABI == ABI_AIX);
17849
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
17852
emit_insn (gen_load_toc_aix_si (dest));
17853
@@ -19048,7 +21508,7 @@
17856
rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
17857
- rtx reg2, rtx rreg)
17858
+ rtx reg2, rtx rreg, rtx split_reg)
17862
@@ -19139,6 +21599,11 @@
17866
+ /* If a store insn has been split into multiple insns, the
17867
+ true source register is given by split_reg. */
17868
+ if (split_reg != NULL_RTX)
17869
+ real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
17871
RTX_FRAME_RELATED_P (insn) = 1;
17872
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
17874
@@ -19246,7 +21711,7 @@
17875
reg = gen_rtx_REG (mode, regno);
17876
insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
17877
return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
17878
- NULL_RTX, NULL_RTX);
17879
+ NULL_RTX, NULL_RTX, NULL_RTX);
17882
/* Emit an offset memory reference suitable for a frame store, while
17883
@@ -19362,7 +21827,7 @@
17884
if ((sel & SAVRES_LR))
17887
- else if (DEFAULT_ABI == ABI_AIX)
17888
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17890
#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
17891
/* No out-of-line save/restore routines for GPRs on AIX. */
17892
@@ -19503,7 +21968,7 @@
17893
static inline unsigned
17894
ptr_regno_for_savres (int sel)
17896
- if (DEFAULT_ABI == ABI_AIX)
17897
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17898
return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
17899
return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
17901
@@ -19588,6 +22053,43 @@
17905
+/* Emit code to store CR fields that need to be saved into REG. */
17908
+rs6000_emit_move_from_cr (rtx reg)
17910
+ /* Only the ELFv2 ABI allows storing only selected fields. */
17911
+ if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
17913
+ int i, cr_reg[8], count = 0;
17915
+ /* Collect CR fields that must be saved. */
17916
+ for (i = 0; i < 8; i++)
17917
+ if (save_reg_p (CR0_REGNO + i))
17918
+ cr_reg[count++] = i;
17920
+ /* If it's just a single one, use mfcrf. */
17923
+ rtvec p = rtvec_alloc (1);
17924
+ rtvec r = rtvec_alloc (2);
17925
+ RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
17926
+ RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
17928
+ = gen_rtx_SET (VOIDmode, reg,
17929
+ gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
17931
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
17935
+ /* ??? It might be better to handle count == 2 / 3 cases here
17936
+ as well, using logical operations to combine the values. */
17939
+ emit_insn (gen_movesi_from_cr (reg));
17942
/* Determine whether the gp REG is really used. */
17945
@@ -19653,6 +22155,17 @@
17946
#define NOT_INUSE(R) do {} while (0)
17949
+ if (DEFAULT_ABI == ABI_ELFv2)
17951
+ cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
17953
+ /* With -mminimal-toc we may generate an extra use of r2 below. */
17954
+ if (!TARGET_SINGLE_PIC_BASE
17955
+ && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
17956
+ cfun->machine->r2_setup_needed = true;
17960
if (flag_stack_usage_info)
17961
current_function_static_stack_size = info->total_size;
17963
@@ -19767,7 +22280,7 @@
17965
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
17966
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
17967
- treg, GEN_INT (-info->total_size));
17968
+ treg, GEN_INT (-info->total_size), NULL_RTX);
17969
sp_off = frame_off = info->total_size;
17972
@@ -19852,14 +22365,14 @@
17974
insn = emit_move_insn (mem, reg);
17975
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
17976
- NULL_RTX, NULL_RTX);
17977
+ NULL_RTX, NULL_RTX, NULL_RTX);
17982
/* If we need to save CR, put it into r12 or r11. Choose r12 except when
17983
r12 will be needed by out-of-line gpr restore. */
17984
- cr_save_regno = (DEFAULT_ABI == ABI_AIX
17985
+ cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17986
&& !(strategy & (SAVE_INLINE_GPRS
17987
| SAVE_NOINLINE_GPRS_SAVES_LR))
17989
@@ -19868,21 +22381,9 @@
17990
&& REGNO (frame_reg_rtx) != cr_save_regno
17991
&& !(using_static_chain_p && cr_save_regno == 11))
17995
cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
17996
START_USE (cr_save_regno);
17997
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
17998
- RTX_FRAME_RELATED_P (insn) = 1;
17999
- /* Now, there's no way that dwarf2out_frame_debug_expr is going
18000
- to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
18001
- But that's OK. All we have to do is specify that _one_ condition
18002
- code register is saved in this stack slot. The thrower's epilogue
18003
- will then restore all the call-saved registers.
18004
- We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux. */
18005
- set = gen_rtx_SET (VOIDmode, cr_save_rtx,
18006
- gen_rtx_REG (SImode, CR2_REGNO));
18007
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
18008
+ rs6000_emit_move_from_cr (cr_save_rtx);
18011
/* Do any required saving of fpr's. If only one or two to save, do
18012
@@ -19920,7 +22421,7 @@
18013
info->lr_save_offset,
18015
rs6000_frame_related (insn, ptr_reg, sp_off,
18016
- NULL_RTX, NULL_RTX);
18017
+ NULL_RTX, NULL_RTX, NULL_RTX);
18021
@@ -19999,7 +22500,7 @@
18022
SAVRES_SAVE | SAVRES_GPR);
18024
rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
18025
- NULL_RTX, NULL_RTX);
18026
+ NULL_RTX, NULL_RTX, NULL_RTX);
18029
/* Move the static chain pointer back. */
18030
@@ -20049,7 +22550,7 @@
18031
info->lr_save_offset + ptr_off,
18033
rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
18034
- NULL_RTX, NULL_RTX);
18035
+ NULL_RTX, NULL_RTX, NULL_RTX);
18039
@@ -20065,7 +22566,7 @@
18040
info->gp_save_offset + frame_off + reg_size * i);
18041
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
18042
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
18043
- NULL_RTX, NULL_RTX);
18044
+ NULL_RTX, NULL_RTX, NULL_RTX);
18046
else if (!WORLD_SAVE_P (info))
18048
@@ -20134,7 +22635,8 @@
18049
be updated if we arrived at this function via a plt call or
18050
toc adjusting stub. */
18051
emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
18052
- toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
18053
+ toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
18054
+ + RS6000_TOC_SAVE_SLOT);
18055
hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
18056
emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
18057
compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
18058
@@ -20153,7 +22655,7 @@
18059
LABEL_NUSES (toc_save_done) += 1;
18061
save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
18062
- TOC_REGNUM, frame_off + 5 * reg_size,
18063
+ TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
18064
sp_off - frame_off);
18066
emit_label (toc_save_done);
18067
@@ -20193,28 +22695,123 @@
18068
rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
18069
GEN_INT (info->cr_save_offset + frame_off));
18070
rtx mem = gen_frame_mem (SImode, addr);
18071
- /* See the large comment above about why CR2_REGNO is used. */
18072
- rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
18074
/* If we didn't copy cr before, do so now using r0. */
18075
if (cr_save_rtx == NULL_RTX)
18080
cr_save_rtx = gen_rtx_REG (SImode, 0);
18081
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
18082
- RTX_FRAME_RELATED_P (insn) = 1;
18083
- set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
18084
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
18085
+ rs6000_emit_move_from_cr (cr_save_rtx);
18087
- insn = emit_move_insn (mem, cr_save_rtx);
18089
+ /* Saving CR requires a two-instruction sequence: one instruction
18090
+ to move the CR to a general-purpose register, and a second
18091
+ instruction that stores the GPR to memory.
18093
+ We do not emit any DWARF CFI records for the first of these,
18094
+ because we cannot properly represent the fact that CR is saved in
18095
+ a register. One reason is that we cannot express that multiple
18096
+ CR fields are saved; another reason is that on 64-bit, the size
18097
+ of the CR register in DWARF (4 bytes) differs from the size of
18098
+ a general-purpose register.
18100
+ This means if any intervening instruction were to clobber one of
18101
+ the call-saved CR fields, we'd have incorrect CFI. To prevent
18102
+ this from happening, we mark the store to memory as a use of
18103
+ those CR fields, which prevents any such instruction from being
18104
+ scheduled in between the two instructions. */
18106
+ int n_crsave = 0;
18109
+ crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
18110
+ for (i = 0; i < 8; i++)
18111
+ if (save_reg_p (CR0_REGNO + i))
18112
+ crsave_v[n_crsave++]
18113
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
18115
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
18116
+ gen_rtvec_v (n_crsave, crsave_v)));
18117
END_USE (REGNO (cr_save_rtx));
18119
- rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
18120
- NULL_RTX, NULL_RTX);
18121
+ /* Now, there's no way that dwarf2out_frame_debug_expr is going to
18122
+ understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
18123
+ so we need to construct a frame expression manually. */
18124
+ RTX_FRAME_RELATED_P (insn) = 1;
18126
+ /* Update address to be stack-pointer relative, like
18127
+ rs6000_frame_related would do. */
18128
+ addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
18129
+ GEN_INT (info->cr_save_offset + sp_off));
18130
+ mem = gen_frame_mem (SImode, addr);
18132
+ if (DEFAULT_ABI == ABI_ELFv2)
18134
+ /* In the ELFv2 ABI we generate separate CFI records for each
18135
+ CR field that was actually saved. They all point to the
18136
+ same 32-bit stack slot. */
18138
+ int n_crframe = 0;
18140
+ for (i = 0; i < 8; i++)
18141
+ if (save_reg_p (CR0_REGNO + i))
18143
+ crframe[n_crframe]
18144
+ = gen_rtx_SET (VOIDmode, mem,
18145
+ gen_rtx_REG (SImode, CR0_REGNO + i));
18147
+ RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
18151
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
18152
+ gen_rtx_PARALLEL (VOIDmode,
18153
+ gen_rtvec_v (n_crframe, crframe)));
18157
+ /* In other ABIs, by convention, we use a single CR regnum to
18158
+ represent the fact that all call-saved CR fields are saved.
18159
+ We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
18160
+ rtx set = gen_rtx_SET (VOIDmode, mem,
18161
+ gen_rtx_REG (SImode, CR2_REGNO));
18162
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
18166
+ /* In the ELFv2 ABI we need to save all call-saved CR fields into
18167
+ *separate* slots if the routine calls __builtin_eh_return, so
18168
+ that they can be independently restored by the unwinder. */
18169
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
18171
+ int i, cr_off = info->ehcr_offset;
18174
+ /* ??? We might get better performance by using multiple mfocrf
18176
+ crsave = gen_rtx_REG (SImode, 0);
18177
+ emit_insn (gen_movesi_from_cr (crsave));
18179
+ for (i = 0; i < 8; i++)
18180
+ if (!call_used_regs[CR0_REGNO + i])
18182
+ rtvec p = rtvec_alloc (2);
18184
+ = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
18186
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
18188
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
18190
+ RTX_FRAME_RELATED_P (insn) = 1;
18191
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
18192
+ gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
18193
+ sp_reg_rtx, cr_off + sp_off));
18195
+ cr_off += reg_size;
18199
/* Update stack and set back pointer unless this is V.4,
18200
for which it was done previously. */
18201
if (!WORLD_SAVE_P (info) && info->push_p
18202
@@ -20292,7 +22889,7 @@
18203
info->altivec_save_offset + ptr_off,
18204
0, V4SImode, SAVRES_SAVE | SAVRES_VR);
18205
rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
18206
- NULL_RTX, NULL_RTX);
18207
+ NULL_RTX, NULL_RTX, NULL_RTX);
18208
if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
18210
/* The oddity mentioned above clobbered our frame reg. */
18211
@@ -20308,7 +22905,7 @@
18212
for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
18213
if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
18215
- rtx areg, savereg, mem;
18216
+ rtx areg, savereg, mem, split_reg;
18219
offset = (info->altivec_save_offset + frame_off
18220
@@ -20326,8 +22923,18 @@
18222
insn = emit_move_insn (mem, savereg);
18224
+ /* When we split a VSX store into two insns, we need to make
18225
+ sure the DWARF info knows which register we are storing.
18226
+ Pass it in to be used on the appropriate note. */
18227
+ if (!BYTES_BIG_ENDIAN
18228
+ && GET_CODE (PATTERN (insn)) == SET
18229
+ && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
18230
+ split_reg = savereg;
18232
+ split_reg = NULL_RTX;
18234
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
18235
- areg, GEN_INT (offset));
18236
+ areg, GEN_INT (offset), split_reg);
18240
@@ -20351,7 +22958,8 @@
18241
be using r12 as frame_reg_rtx and r11 as the static chain
18242
pointer for nested functions. */
18244
- if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
18245
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18246
+ && !using_static_chain_p)
18248
else if (REGNO (frame_reg_rtx) == 12)
18250
@@ -20390,7 +22998,7 @@
18251
can use register 0. This allows us to use a plain 'blr' to return
18252
from the procedure more often. */
18253
int save_LR_around_toc_setup = (TARGET_ELF
18254
- && DEFAULT_ABI != ABI_AIX
18255
+ && DEFAULT_ABI == ABI_V4
18257
&& ! info->lr_save_p
18258
&& EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
18259
@@ -20452,7 +23060,7 @@
18260
if (rs6000_save_toc_in_prologue_p ())
18262
rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
18263
- emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
18264
+ emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
18268
@@ -20493,6 +23101,49 @@
18272
+ /* ELFv2 ABI r2 setup code and local entry point. This must follow
18273
+ immediately after the global entry point label. */
18274
+ if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
18276
+ const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
18278
+ fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
18279
+ fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
18281
+ fputs ("\t.localentry\t", file);
18282
+ assemble_name (file, name);
18283
+ fputs (",.-", file);
18284
+ assemble_name (file, name);
18285
+ fputs ("\n", file);
18288
+ /* Output -mprofile-kernel code. This needs to be done here instead of
18289
+ in output_function_profile since it must go after the ELFv2 ABI
18290
+ local entry point. */
18291
+ if (TARGET_PROFILE_KERNEL)
18293
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
18294
+ gcc_assert (!TARGET_32BIT);
18296
+ asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
18297
+ asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
18299
+ /* In the ELFv2 ABI we have no compiler stack word. It must be
18300
+ the resposibility of _mcount to preserve the static chain
18301
+ register if required. */
18302
+ if (DEFAULT_ABI != ABI_ELFv2
18303
+ && cfun->static_chain_decl != NULL)
18305
+ asm_fprintf (file, "\tstd %s,24(%s)\n",
18306
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
18307
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
18308
+ asm_fprintf (file, "\tld %s,24(%s)\n",
18309
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
18312
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
18315
rs6000_pic_labelno++;
18318
@@ -20545,6 +23196,7 @@
18320
if (using_mfcr_multiple && count > 1)
18326
@@ -20562,16 +23214,43 @@
18327
gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
18330
- emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
18331
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
18332
gcc_assert (ndx == count);
18334
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
18335
+ CR field separately. */
18336
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
18338
+ for (i = 0; i < 8; i++)
18339
+ if (save_reg_p (CR0_REGNO + i))
18340
+ add_reg_note (insn, REG_CFA_RESTORE,
18341
+ gen_rtx_REG (SImode, CR0_REGNO + i));
18343
+ RTX_FRAME_RELATED_P (insn) = 1;
18347
for (i = 0; i < 8; i++)
18348
if (save_reg_p (CR0_REGNO + i))
18349
- emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
18352
+ rtx insn = emit_insn (gen_movsi_to_cr_one
18353
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
18355
- if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
18356
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
18357
+ CR field separately, attached to the insn that in fact
18358
+ restores this particular CR field. */
18359
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
18361
+ add_reg_note (insn, REG_CFA_RESTORE,
18362
+ gen_rtx_REG (SImode, CR0_REGNO + i));
18364
+ RTX_FRAME_RELATED_P (insn) = 1;
18368
+ /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
18369
+ if (!exit_func && DEFAULT_ABI != ABI_ELFv2
18370
+ && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
18372
rtx insn = get_last_insn ();
18373
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
18374
@@ -20612,10 +23291,22 @@
18376
add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
18378
- if (info->cr_save_p)
18379
+ if (DEFAULT_ABI == ABI_ELFv2)
18382
+ for (i = 0; i < 8; i++)
18383
+ if (save_reg_p (CR0_REGNO + i))
18385
+ rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
18386
+ cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
18390
+ else if (info->cr_save_p)
18391
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
18392
gen_rtx_REG (SImode, CR2_REGNO),
18395
if (info->lr_save_p)
18396
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
18397
gen_rtx_REG (Pmode, LR_REGNO),
18398
@@ -21113,6 +23804,35 @@
18399
|| (!restoring_GPRs_inline
18400
&& info->first_fp_reg_save == 64));
18402
+ /* In the ELFv2 ABI we need to restore all call-saved CR fields from
18403
+ *separate* slots if the routine calls __builtin_eh_return, so
18404
+ that they can be independently restored by the unwinder. */
18405
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
18407
+ int i, cr_off = info->ehcr_offset;
18409
+ for (i = 0; i < 8; i++)
18410
+ if (!call_used_regs[CR0_REGNO + i])
18412
+ rtx reg = gen_rtx_REG (SImode, 0);
18413
+ emit_insn (gen_frame_load (reg, frame_reg_rtx,
18414
+ cr_off + frame_off));
18416
+ insn = emit_insn (gen_movsi_to_cr_one
18417
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
18419
+ if (!exit_func && flag_shrink_wrap)
18421
+ add_reg_note (insn, REG_CFA_RESTORE,
18422
+ gen_rtx_REG (SImode, CR0_REGNO + i));
18424
+ RTX_FRAME_RELATED_P (insn) = 1;
18427
+ cr_off += reg_size;
18431
/* Get the old lr if we saved it. If we are restoring registers
18432
out-of-line, then the out-of-line routines can do this for us. */
18433
if (restore_lr && restoring_GPRs_inline)
18434
@@ -21156,7 +23876,7 @@
18436
rtx reg = gen_rtx_REG (reg_mode, 2);
18437
emit_insn (gen_frame_load (reg, frame_reg_rtx,
18438
- frame_off + 5 * reg_size));
18439
+ frame_off + RS6000_TOC_SAVE_SLOT));
18443
@@ -21442,6 +24162,7 @@
18444
if (! restoring_FPRs_inline)
18450
if (flag_shrink_wrap)
18451
@@ -21450,10 +24171,9 @@
18452
sym = rs6000_savres_routine_sym (info,
18453
SAVRES_FPR | (lr ? SAVRES_LR : 0));
18454
RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
18455
- RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
18456
- gen_rtx_REG (Pmode,
18457
- DEFAULT_ABI == ABI_AIX
18459
+ reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
18460
+ RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
18462
for (i = 0; i < 64 - info->first_fp_reg_save; i++)
18464
rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
18465
@@ -21531,7 +24251,8 @@
18467
System V.4 Powerpc's (and the embedded ABI derived from it) use a
18468
different traceback table. */
18469
- if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
18470
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18471
+ && ! flag_inhibit_size_directive
18472
&& rs6000_traceback != traceback_none && !cfun->is_thunk)
18474
const char *fname = NULL;
18475
@@ -21859,6 +24580,12 @@
18476
SIBLING_CALL_P (insn) = 1;
18479
+ /* Ensure we have a global entry point for the thunk. ??? We could
18480
+ avoid that if the target routine doesn't need a global entry point,
18481
+ but we do not know whether this is the case at this point. */
18482
+ if (DEFAULT_ABI == ABI_ELFv2)
18483
+ cfun->machine->r2_setup_needed = true;
18485
/* Run just enough of rest_of_compilation to get the insns emitted.
18486
There's not really enough bulk here to make other passes such as
18487
instruction scheduling worth while. Note that use_thunk calls
18488
@@ -22555,7 +25282,7 @@
18489
if (TARGET_PROFILE_KERNEL)
18492
- if (DEFAULT_ABI == ABI_AIX)
18493
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18495
#ifndef NO_PROFILE_COUNTERS
18496
# define NO_PROFILE_COUNTERS 0
18497
@@ -22699,29 +25426,9 @@
18503
- if (!TARGET_PROFILE_KERNEL)
18505
- /* Don't do anything, done in output_profile_hook (). */
18509
- gcc_assert (!TARGET_32BIT);
18511
- asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
18512
- asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
18514
- if (cfun->static_chain_decl != NULL)
18516
- asm_fprintf (file, "\tstd %s,24(%s)\n",
18517
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
18518
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
18519
- asm_fprintf (file, "\tld %s,24(%s)\n",
18520
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
18523
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
18525
+ /* Don't do anything, done in output_profile_hook (). */
18529
@@ -22847,6 +25554,7 @@
18530
|| rs6000_cpu_attr == CPU_POWER4
18531
|| rs6000_cpu_attr == CPU_POWER5
18532
|| rs6000_cpu_attr == CPU_POWER7
18533
+ || rs6000_cpu_attr == CPU_POWER8
18534
|| rs6000_cpu_attr == CPU_CELL)
18535
&& recog_memoized (dep_insn)
18536
&& (INSN_CODE (dep_insn) >= 0))
18537
@@ -23129,7 +25837,8 @@
18538
if (rs6000_cpu_attr == CPU_CELL)
18539
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18541
- if (rs6000_sched_groups)
18542
+ if (rs6000_sched_groups
18543
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
18545
enum attr_type type = get_attr_type (insn);
18546
if (type == TYPE_LOAD_EXT_U
18547
@@ -23154,7 +25863,8 @@
18548
|| GET_CODE (PATTERN (insn)) == CLOBBER)
18551
- if (rs6000_sched_groups)
18552
+ if (rs6000_sched_groups
18553
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
18555
enum attr_type type = get_attr_type (insn);
18556
if (type == TYPE_LOAD_U || type == TYPE_STORE_U
18557
@@ -23433,6 +26143,8 @@
18566
@@ -24060,6 +26772,39 @@
18570
+ case PROCESSOR_POWER8:
18571
+ type = get_attr_type (insn);
18575
+ case TYPE_CR_LOGICAL:
18576
+ case TYPE_DELAYED_CR:
18580
+ case TYPE_COMPARE:
18581
+ case TYPE_DELAYED_COMPARE:
18582
+ case TYPE_VAR_DELAYED_COMPARE:
18583
+ case TYPE_IMUL_COMPARE:
18584
+ case TYPE_LMUL_COMPARE:
18587
+ case TYPE_LOAD_L:
18588
+ case TYPE_STORE_C:
18589
+ case TYPE_LOAD_U:
18590
+ case TYPE_LOAD_UX:
18591
+ case TYPE_LOAD_EXT:
18592
+ case TYPE_LOAD_EXT_U:
18593
+ case TYPE_LOAD_EXT_UX:
18594
+ case TYPE_STORE_UX:
18595
+ case TYPE_VECSTORE:
18596
+ case TYPE_MFJMPR:
18597
+ case TYPE_MTJMPR:
18606
@@ -24138,6 +26883,25 @@
18610
+ case PROCESSOR_POWER8:
18611
+ type = get_attr_type (insn);
18619
+ case TYPE_LOAD_L:
18620
+ case TYPE_STORE_C:
18621
+ case TYPE_LOAD_EXT_U:
18622
+ case TYPE_LOAD_EXT_UX:
18623
+ case TYPE_STORE_UX:
18632
@@ -24227,8 +26991,9 @@
18633
if (can_issue_more && !is_branch_slot_insn (next_insn))
18636
- /* Power6 and Power7 have special group ending nop. */
18637
- if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
18638
+ /* Do we have a special group ending nop? */
18639
+ if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
18640
+ || rs6000_cpu_attr == CPU_POWER8)
18642
nop = gen_group_ending_nop ();
18643
emit_insn_before (nop, next_insn);
18644
@@ -24599,6 +27364,11 @@
18645
ret = (TARGET_32BIT) ? 12 : 24;
18649
+ gcc_assert (!TARGET_32BIT);
18655
ret = (TARGET_32BIT) ? 40 : 48;
18656
@@ -24654,6 +27424,7 @@
18659
/* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18663
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18664
@@ -24948,7 +27719,7 @@
18666
rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
18668
- if (DEFAULT_ABI == ABI_AIX
18669
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18670
&& TARGET_MINIMAL_TOC
18671
&& !TARGET_RELOCATABLE)
18673
@@ -24969,7 +27740,8 @@
18675
fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
18677
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
18678
+ else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18679
+ && !TARGET_RELOCATABLE)
18680
fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
18683
@@ -25519,7 +28291,7 @@
18687
- else if (DEFAULT_ABI == ABI_AIX)
18688
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18692
@@ -25595,7 +28367,7 @@
18694
rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
18696
- if (TARGET_64BIT)
18697
+ if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
18699
fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
18700
ASM_OUTPUT_LABEL (file, name);
18701
@@ -25661,8 +28433,7 @@
18702
fprintf (file, "%s:\n", desc_name);
18703
fprintf (file, "\t.long %s\n", orig_name);
18704
fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
18705
- if (DEFAULT_ABI == ABI_AIX)
18706
- fputs ("\t.long 0\n", file);
18707
+ fputs ("\t.long 0\n", file);
18708
fprintf (file, "\t.previous\n");
18710
ASM_OUTPUT_LABEL (file, name);
18711
@@ -25691,7 +28462,7 @@
18714
#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
18715
- if (TARGET_32BIT)
18716
+ if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
18717
file_end_indicate_exec_stack ();
18720
@@ -26431,7 +29202,8 @@
18721
/* For those processors that have slow LR/CTR moves, make them more
18722
expensive than memory in order to bias spills to memory .*/
18723
else if ((rs6000_cpu == PROCESSOR_POWER6
18724
- || rs6000_cpu == PROCESSOR_POWER7)
18725
+ || rs6000_cpu == PROCESSOR_POWER7
18726
+ || rs6000_cpu == PROCESSOR_POWER8)
18727
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
18728
ret = 6 * hard_regno_nregs[0][mode];
18730
@@ -26441,7 +29213,7 @@
18733
/* If we have VSX, we can easily move between FPR or Altivec registers. */
18734
- else if (VECTOR_UNIT_VSX_P (mode)
18735
+ else if (VECTOR_MEM_VSX_P (mode)
18736
&& reg_classes_intersect_p (to, VSX_REGS)
18737
&& reg_classes_intersect_p (from, VSX_REGS))
18738
ret = 2 * hard_regno_nregs[32][mode];
18739
@@ -26482,7 +29254,8 @@
18741
if (reg_classes_intersect_p (rclass, GENERAL_REGS))
18742
ret = 4 * hard_regno_nregs[0][mode];
18743
- else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
18744
+ else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
18745
+ || reg_classes_intersect_p (rclass, VSX_REGS)))
18746
ret = 4 * hard_regno_nregs[32][mode];
18747
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
18748
ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
18749
@@ -26644,54 +29417,26 @@
18750
emit_insn (gen_rtx_SET (VOIDmode, dst, r));
18753
-/* Newton-Raphson approximation of floating point divide with just 2 passes
18754
- (either single precision floating point, or newer machines with higher
18755
- accuracy estimates). Support both scalar and vector divide. Assumes no
18756
- trapping math and finite arguments. */
18757
+/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
18758
+ add a reg_note saying that this was a division. Support both scalar and
18759
+ vector divide. Assumes no trapping math and finite arguments. */
18762
-rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
18764
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
18766
enum machine_mode mode = GET_MODE (dst);
18767
- rtx x0, e0, e1, y1, u0, v0;
18768
- enum insn_code code = optab_handler (smul_optab, mode);
18769
- insn_gen_fn gen_mul = GEN_FCN (code);
18770
- rtx one = rs6000_load_constant_and_splat (mode, dconst1);
18771
+ rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
18774
- gcc_assert (code != CODE_FOR_nothing);
18775
+ /* Low precision estimates guarantee 5 bits of accuracy. High
18776
+ precision estimates guarantee 14 bits of accuracy. SFmode
18777
+ requires 23 bits of accuracy. DFmode requires 52 bits of
18778
+ accuracy. Each pass at least doubles the accuracy, leading
18779
+ to the following. */
18780
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
18781
+ if (mode == DFmode || mode == V2DFmode)
18784
- /* x0 = 1./d estimate */
18785
- x0 = gen_reg_rtx (mode);
18786
- emit_insn (gen_rtx_SET (VOIDmode, x0,
18787
- gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
18790
- e0 = gen_reg_rtx (mode);
18791
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - (d * x0) */
18793
- e1 = gen_reg_rtx (mode);
18794
- rs6000_emit_madd (e1, e0, e0, e0); /* e1 = (e0 * e0) + e0 */
18796
- y1 = gen_reg_rtx (mode);
18797
- rs6000_emit_madd (y1, e1, x0, x0); /* y1 = (e1 * x0) + x0 */
18799
- u0 = gen_reg_rtx (mode);
18800
- emit_insn (gen_mul (u0, n, y1)); /* u0 = n * y1 */
18802
- v0 = gen_reg_rtx (mode);
18803
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - (d * u0) */
18805
- rs6000_emit_madd (dst, v0, y1, u0); /* dst = (v0 * y1) + u0 */
18808
-/* Newton-Raphson approximation of floating point divide that has a low
18809
- precision estimate. Assumes no trapping math and finite arguments. */
18812
-rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
18814
- enum machine_mode mode = GET_MODE (dst);
18815
- rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
18816
enum insn_code code = optab_handler (smul_optab, mode);
18817
insn_gen_fn gen_mul = GEN_FCN (code);
18819
@@ -26705,47 +29450,45 @@
18820
gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
18823
- e0 = gen_reg_rtx (mode);
18824
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - d * x0 */
18825
+ /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
18826
+ if (passes > 1) {
18828
- y1 = gen_reg_rtx (mode);
18829
- rs6000_emit_madd (y1, e0, x0, x0); /* y1 = x0 + e0 * x0 */
18830
+ /* e0 = 1. - d * x0 */
18831
+ e0 = gen_reg_rtx (mode);
18832
+ rs6000_emit_nmsub (e0, d, x0, one);
18834
- e1 = gen_reg_rtx (mode);
18835
- emit_insn (gen_mul (e1, e0, e0)); /* e1 = e0 * e0 */
18836
+ /* x1 = x0 + e0 * x0 */
18837
+ x1 = gen_reg_rtx (mode);
18838
+ rs6000_emit_madd (x1, e0, x0, x0);
18840
- y2 = gen_reg_rtx (mode);
18841
- rs6000_emit_madd (y2, e1, y1, y1); /* y2 = y1 + e1 * y1 */
18842
+ for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
18843
+ ++i, xprev = xnext, eprev = enext) {
18845
+ /* enext = eprev * eprev */
18846
+ enext = gen_reg_rtx (mode);
18847
+ emit_insn (gen_mul (enext, eprev, eprev));
18849
- e2 = gen_reg_rtx (mode);
18850
- emit_insn (gen_mul (e2, e1, e1)); /* e2 = e1 * e1 */
18851
+ /* xnext = xprev + enext * xprev */
18852
+ xnext = gen_reg_rtx (mode);
18853
+ rs6000_emit_madd (xnext, enext, xprev, xprev);
18856
- y3 = gen_reg_rtx (mode);
18857
- rs6000_emit_madd (y3, e2, y2, y2); /* y3 = y2 + e2 * y2 */
18861
- u0 = gen_reg_rtx (mode);
18862
- emit_insn (gen_mul (u0, n, y3)); /* u0 = n * y3 */
18863
+ /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
18865
- v0 = gen_reg_rtx (mode);
18866
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - d * u0 */
18867
+ /* u = n * xprev */
18868
+ u = gen_reg_rtx (mode);
18869
+ emit_insn (gen_mul (u, n, xprev));
18871
- rs6000_emit_madd (dst, v0, y3, u0); /* dst = u0 + v0 * y3 */
18873
+ /* v = n - (d * u) */
18874
+ v = gen_reg_rtx (mode);
18875
+ rs6000_emit_nmsub (v, d, u, n);
18877
-/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
18878
- add a reg_note saying that this was a division. Support both scalar and
18879
- vector divide. Assumes no trapping math and finite arguments. */
18880
+ /* dst = (v * xprev) + u */
18881
+ rs6000_emit_madd (dst, v, xprev, u);
18884
-rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
18886
- enum machine_mode mode = GET_MODE (dst);
18888
- if (RS6000_RECIP_HIGH_PRECISION_P (mode))
18889
- rs6000_emit_swdiv_high_precision (dst, n, d);
18891
- rs6000_emit_swdiv_low_precision (dst, n, d);
18894
add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
18896
@@ -26759,7 +29502,16 @@
18897
enum machine_mode mode = GET_MODE (src);
18898
rtx x0 = gen_reg_rtx (mode);
18899
rtx y = gen_reg_rtx (mode);
18900
- int passes = (TARGET_RECIP_PRECISION) ? 2 : 3;
18902
+ /* Low precision estimates guarantee 5 bits of accuracy. High
18903
+ precision estimates guarantee 14 bits of accuracy. SFmode
18904
+ requires 23 bits of accuracy. DFmode requires 52 bits of
18905
+ accuracy. Each pass at least doubles the accuracy, leading
18906
+ to the following. */
18907
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
18908
+ if (mode == DFmode || mode == V2DFmode)
18911
REAL_VALUE_TYPE dconst3_2;
18914
@@ -26921,6 +29673,136 @@
18918
+/* Expand an Altivec constant permutation for little endian mode.
18919
+ There are two issues: First, the two input operands must be
18920
+ swapped so that together they form a double-wide array in LE
18921
+ order. Second, the vperm instruction has surprising behavior
18922
+ in LE mode: it interprets the elements of the source vectors
18923
+ in BE mode ("left to right") and interprets the elements of
18924
+ the destination vector in LE mode ("right to left"). To
18925
+ correct for this, we must subtract each element of the permute
18926
+ control vector from 31.
18928
+ For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
18929
+ with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
18930
+ We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
18931
+ serve as the permute control vector. Then, in BE mode,
18935
+ places the desired result in vr9. However, in LE mode the
18936
+ vector contents will be
18938
+ vr10 = 00000003 00000002 00000001 00000000
18939
+ vr11 = 00000007 00000006 00000005 00000004
18941
+ The result of the vperm using the same permute control vector is
18943
+ vr9 = 05000000 07000000 01000000 03000000
18945
+ That is, the leftmost 4 bytes of vr10 are interpreted as the
18946
+ source for the rightmost 4 bytes of vr9, and so on.
18948
+ If we change the permute control vector to
18950
+ vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
18956
+ we get the desired
18958
+ vr9 = 00000006 00000004 00000002 00000000. */
18961
+altivec_expand_vec_perm_const_le (rtx operands[4])
18965
+ rtx constv, unspec;
18966
+ rtx target = operands[0];
18967
+ rtx op0 = operands[1];
18968
+ rtx op1 = operands[2];
18969
+ rtx sel = operands[3];
18971
+ /* Unpack and adjust the constant selector. */
18972
+ for (i = 0; i < 16; ++i)
18974
+ rtx e = XVECEXP (sel, 0, i);
18975
+ unsigned int elt = 31 - (INTVAL (e) & 31);
18976
+ perm[i] = GEN_INT (elt);
18979
+ /* Expand to a permute, swapping the inputs and using the
18980
+ adjusted selector. */
18981
+ if (!REG_P (op0))
18982
+ op0 = force_reg (V16QImode, op0);
18983
+ if (!REG_P (op1))
18984
+ op1 = force_reg (V16QImode, op1);
18986
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
18987
+ constv = force_reg (V16QImode, constv);
18988
+ unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
18990
+ if (!REG_P (target))
18992
+ rtx tmp = gen_reg_rtx (V16QImode);
18993
+ emit_move_insn (tmp, unspec);
18997
+ emit_move_insn (target, unspec);
19000
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
19001
+ permute control vector. But here it's not a constant, so we must
19002
+ generate a vector splat/subtract to do the adjustment. */
19005
+altivec_expand_vec_perm_le (rtx operands[4])
19007
+ rtx splat, unspec;
19008
+ rtx target = operands[0];
19009
+ rtx op0 = operands[1];
19010
+ rtx op1 = operands[2];
19011
+ rtx sel = operands[3];
19012
+ rtx tmp = target;
19014
+ /* Get everything in regs so the pattern matches. */
19015
+ if (!REG_P (op0))
19016
+ op0 = force_reg (V16QImode, op0);
19017
+ if (!REG_P (op1))
19018
+ op1 = force_reg (V16QImode, op1);
19019
+ if (!REG_P (sel))
19020
+ sel = force_reg (V16QImode, sel);
19021
+ if (!REG_P (target))
19022
+ tmp = gen_reg_rtx (V16QImode);
19024
+ /* SEL = splat(31) - SEL. */
19025
+ /* We want to subtract from 31, but we can't vspltisb 31 since
19026
+ it's out of range. -1 works as well because only the low-order
19027
+ five bits of the permute control vector elements are used. */
19028
+ splat = gen_rtx_VEC_DUPLICATE (V16QImode,
19029
+ gen_rtx_CONST_INT (QImode, -1));
19030
+ emit_move_insn (tmp, splat);
19031
+ sel = gen_rtx_MINUS (V16QImode, tmp, sel);
19032
+ emit_move_insn (tmp, sel);
19034
+ /* Permute with operands reversed and adjusted selector. */
19035
+ unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp),
19038
+ /* Copy into target, possibly by way of a register. */
19039
+ if (!REG_P (target))
19041
+ emit_move_insn (tmp, unspec);
19045
+ emit_move_insn (target, unspec);
19048
/* Expand an Altivec constant permutation. Return true if we match
19049
an efficient implementation; false to fall back to VPERM. */
19051
@@ -26928,26 +29810,37 @@
19052
altivec_expand_vec_perm_const (rtx operands[4])
19054
struct altivec_perm_insn {
19055
+ HOST_WIDE_INT mask;
19056
enum insn_code impl;
19057
unsigned char perm[16];
19059
static const struct altivec_perm_insn patterns[] = {
19060
- { CODE_FOR_altivec_vpkuhum,
19061
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
19062
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
19063
- { CODE_FOR_altivec_vpkuwum,
19064
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
19065
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
19066
- { CODE_FOR_altivec_vmrghb,
19067
+ { OPTION_MASK_ALTIVEC,
19068
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb : CODE_FOR_altivec_vmrglb,
19069
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
19070
- { CODE_FOR_altivec_vmrghh,
19071
+ { OPTION_MASK_ALTIVEC,
19072
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh : CODE_FOR_altivec_vmrglh,
19073
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
19074
- { CODE_FOR_altivec_vmrghw,
19075
+ { OPTION_MASK_ALTIVEC,
19076
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw : CODE_FOR_altivec_vmrglw,
19077
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
19078
- { CODE_FOR_altivec_vmrglb,
19079
+ { OPTION_MASK_ALTIVEC,
19080
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb : CODE_FOR_altivec_vmrghb,
19081
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
19082
- { CODE_FOR_altivec_vmrglh,
19083
+ { OPTION_MASK_ALTIVEC,
19084
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh : CODE_FOR_altivec_vmrghh,
19085
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
19086
- { CODE_FOR_altivec_vmrglw,
19087
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
19088
+ { OPTION_MASK_ALTIVEC,
19089
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw : CODE_FOR_altivec_vmrghw,
19090
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
19091
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
19092
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
19093
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
19094
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
19097
unsigned int i, j, elt, which;
19098
@@ -27004,6 +29897,8 @@
19102
+ if (!BYTES_BIG_ENDIAN)
19104
emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
19107
@@ -27015,9 +29910,10 @@
19111
+ int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
19112
x = gen_reg_rtx (V8HImode);
19113
emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
19114
- GEN_INT (elt / 2)));
19115
+ GEN_INT (field)));
19116
emit_move_insn (target, gen_lowpart (V16QImode, x));
19119
@@ -27033,9 +29929,10 @@
19123
+ int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
19124
x = gen_reg_rtx (V4SImode);
19125
emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
19126
- GEN_INT (elt / 4)));
19127
+ GEN_INT (field)));
19128
emit_move_insn (target, gen_lowpart (V16QImode, x));
19131
@@ -27047,6 +29944,9 @@
19135
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
19138
elt = patterns[j].perm[0];
19139
if (perm[0] == elt)
19141
@@ -27070,7 +29970,30 @@
19142
enum machine_mode omode = insn_data[icode].operand[0].mode;
19143
enum machine_mode imode = insn_data[icode].operand[1].mode;
19146
+ /* For little-endian, don't use vpkuwum and vpkuhum if the
19147
+ underlying vector type is not V4SI and V8HI, respectively.
19148
+ For example, using vpkuwum with a V8HI picks up the even
19149
+ halfwords (BE numbering) when the even halfwords (LE
19150
+ numbering) are what we need. */
19151
+ if (!BYTES_BIG_ENDIAN
19152
+ && icode == CODE_FOR_altivec_vpkuwum
19153
+ && ((GET_CODE (op0) == REG
19154
+ && GET_MODE (op0) != V4SImode)
19155
+ || (GET_CODE (op0) == SUBREG
19156
+ && GET_MODE (XEXP (op0, 0)) != V4SImode)))
19158
+ if (!BYTES_BIG_ENDIAN
19159
+ && icode == CODE_FOR_altivec_vpkuhum
19160
+ && ((GET_CODE (op0) == REG
19161
+ && GET_MODE (op0) != V8HImode)
19162
+ || (GET_CODE (op0) == SUBREG
19163
+ && GET_MODE (XEXP (op0, 0)) != V8HImode)))
19166
+ /* For little-endian, the two input operands must be swapped
19167
+ (or swapped back) to ensure proper right-to-left numbering
19168
+ from 0 to 2N-1. */
19169
+ if (swapped ^ !BYTES_BIG_ENDIAN)
19170
x = op0, op0 = op1, op1 = x;
19171
if (imode != V16QImode)
19173
@@ -27088,6 +30011,12 @@
19177
+ if (!BYTES_BIG_ENDIAN)
19179
+ altivec_expand_vec_perm_const_le (operands);
19186
@@ -27137,6 +30066,21 @@
19187
gcc_assert (GET_MODE_NUNITS (vmode) == 2);
19188
dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
19190
+ /* For little endian, swap operands and invert/swap selectors
19191
+ to get the correct xxpermdi. The operand swap sets up the
19192
+ inputs as a little endian array. The selectors are swapped
19193
+ because they are defined to use big endian ordering. The
19194
+ selectors are inverted to get the correct doublewords for
19195
+ little endian ordering. */
19196
+ if (!BYTES_BIG_ENDIAN)
19199
+ perm0 = 3 - perm0;
19200
+ perm1 = 3 - perm1;
19201
+ n = perm0, perm0 = perm1, perm1 = n;
19202
+ x = op0, op0 = op1, op1 = x;
19205
x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
19206
v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
19207
x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
19208
@@ -27232,7 +30176,7 @@
19209
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
19212
- high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
19213
+ high = (highp ? 0 : nelt / 2);
19214
for (i = 0; i < nelt / 2; i++)
19216
perm[i * 2] = GEN_INT (i + high);
19217
@@ -27287,6 +30231,8 @@
19219
enum machine_mode mode;
19220
unsigned int regno;
19221
+ enum machine_mode elt_mode;
19224
/* Special handling for structs in darwin64. */
19226
@@ -27306,6 +30252,36 @@
19227
/* Otherwise fall through to standard ABI rules. */
19230
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
19231
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
19232
+ &elt_mode, &n_elts))
19234
+ int first_reg, n_regs, i;
19237
+ if (SCALAR_FLOAT_MODE_P (elt_mode))
19239
+ /* _Decimal128 must use even/odd register pairs. */
19240
+ first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
19241
+ n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
19245
+ first_reg = ALTIVEC_ARG_RETURN;
19249
+ par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
19250
+ for (i = 0; i < n_elts; i++)
19252
+ rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
19253
+ rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
19254
+ XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
19260
if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
19262
/* Long long return value need be split in -mpowerpc64, 32bit ABI. */
19263
@@ -27680,22 +30656,32 @@
19265
{ "altivec", OPTION_MASK_ALTIVEC, false, true },
19266
{ "cmpb", OPTION_MASK_CMPB, false, true },
19267
+ { "crypto", OPTION_MASK_CRYPTO, false, true },
19268
+ { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
19269
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
19270
{ "fprnd", OPTION_MASK_FPRND, false, true },
19271
{ "hard-dfp", OPTION_MASK_DFP, false, true },
19272
+ { "htm", OPTION_MASK_HTM, false, true },
19273
{ "isel", OPTION_MASK_ISEL, false, true },
19274
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
19275
{ "mfpgpr", OPTION_MASK_MFPGPR, false, true },
19276
{ "mulhw", OPTION_MASK_MULHW, false, true },
19277
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
19278
- { "update", OPTION_MASK_NO_UPDATE, true , true },
19279
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
19280
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
19281
+ { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
19282
+ { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
19283
+ { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
19284
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
19285
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
19286
+ { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
19287
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
19288
{ "string", OPTION_MASK_STRING, false, true },
19289
+ { "update", OPTION_MASK_NO_UPDATE, true , true },
19290
+ { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
19291
+ { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
19292
{ "vsx", OPTION_MASK_VSX, false, true },
19293
+ { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
19294
#ifdef OPTION_MASK_64BIT
19296
{ "aix64", OPTION_MASK_64BIT, false, false },
19297
@@ -27735,6 +30721,9 @@
19298
{ "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
19299
{ "popcntd", RS6000_BTM_POPCNTD, false, false },
19300
{ "cell", RS6000_BTM_CELL, false, false },
19301
+ { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
19302
+ { "crypto", RS6000_BTM_CRYPTO, false, false },
19303
+ { "htm", RS6000_BTM_HTM, false, false },
19306
/* Option variables that we want to support inside attribute((target)) and
19307
@@ -28251,7 +31240,6 @@
19309
size_t max_column = 76;
19310
const char *comma = "";
19311
- const char *nl = "\n";
19314
start_column += fprintf (file, "%*s", indent, "");
19315
@@ -28282,7 +31270,6 @@
19316
fprintf (stderr, ", \\\n%*s", (int)start_column, "");
19317
cur_column = start_column + len;
19322
fprintf (file, "%s%s%s%s", comma, prefix, no_str,
19323
@@ -28292,7 +31279,7 @@
19327
- fputs (nl, file);
19328
+ fputs ("\n", file);
19331
/* Helper function to print the current isa options on a line. */
19332
@@ -28468,118 +31455,149 @@
19336
-/* A function pointer under AIX is a pointer to a data area whose first word
19337
- contains the actual address of the function, whose second word contains a
19338
- pointer to its TOC, and whose third word contains a value to place in the
19339
- static chain register (r11). Note that if we load the static chain, our
19340
- "trampoline" need not have any executable code. */
19342
+/* Expand code to perform a call under the AIX or ELFv2 ABI. */
19345
-rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
19346
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
19348
+ rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
19349
+ rtx toc_load = NULL_RTX;
19350
+ rtx toc_restore = NULL_RTX;
19355
- rtx stack_toc_offset;
19356
- rtx stack_toc_mem;
19357
- rtx func_toc_offset;
19358
- rtx func_toc_mem;
19359
- rtx func_sc_offset;
19361
+ rtx abi_reg = NULL_RTX;
19365
- rtx (*call_func) (rtx, rtx, rtx, rtx);
19366
- rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
19368
- stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
19369
- toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
19370
+ /* Handle longcall attributes. */
19371
+ if (INTVAL (cookie) & CALL_LONG)
19372
+ func_desc = rs6000_longcall_ref (func_desc);
19374
- /* Load up address of the actual function. */
19375
- func_desc = force_reg (Pmode, func_desc);
19376
- func_addr = gen_reg_rtx (Pmode);
19377
- emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
19379
- if (TARGET_32BIT)
19380
+ /* Handle indirect calls. */
19381
+ if (GET_CODE (func_desc) != SYMBOL_REF
19382
+ || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
19384
+ /* Save the TOC into its reserved slot before the call,
19385
+ and prepare to restore it after the call. */
19386
+ rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
19387
+ rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
19388
+ rtx stack_toc_mem = gen_frame_mem (Pmode,
19389
+ gen_rtx_PLUS (Pmode, stack_ptr,
19390
+ stack_toc_offset));
19391
+ toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
19393
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
19394
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
19395
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
19396
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19398
- call_func = gen_call_indirect_aix32bit;
19399
- call_value_func = gen_call_value_indirect_aix32bit;
19401
+ /* Can we optimize saving the TOC in the prologue or
19402
+ do we need to do it at every call? */
19403
+ if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
19404
+ cfun->machine->save_toc_in_prologue = true;
19407
- call_func = gen_call_indirect_aix32bit_nor11;
19408
- call_value_func = gen_call_value_indirect_aix32bit_nor11;
19409
+ MEM_VOLATILE_P (stack_toc_mem) = 1;
19410
+ emit_move_insn (stack_toc_mem, toc_reg);
19415
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
19416
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
19417
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
19418
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19420
+ if (DEFAULT_ABI == ABI_ELFv2)
19422
- call_func = gen_call_indirect_aix64bit;
19423
- call_value_func = gen_call_value_indirect_aix64bit;
19424
+ /* A function pointer in the ELFv2 ABI is just a plain address, but
19425
+ the ABI requires it to be loaded into r12 before the call. */
19426
+ func_addr = gen_rtx_REG (Pmode, 12);
19427
+ emit_move_insn (func_addr, func_desc);
19428
+ abi_reg = func_addr;
19432
- call_func = gen_call_indirect_aix64bit_nor11;
19433
- call_value_func = gen_call_value_indirect_aix64bit_nor11;
19436
+ /* A function pointer under AIX is a pointer to a data area whose
19437
+ first word contains the actual address of the function, whose
19438
+ second word contains a pointer to its TOC, and whose third word
19439
+ contains a value to place in the static chain register (r11).
19440
+ Note that if we load the static chain, our "trampoline" need
19441
+ not have any executable code. */
19443
- /* Reserved spot to store the TOC. */
19444
- stack_toc_mem = gen_frame_mem (Pmode,
19445
- gen_rtx_PLUS (Pmode,
19447
- stack_toc_offset));
19448
+ /* Load up address of the actual function. */
19449
+ func_desc = force_reg (Pmode, func_desc);
19450
+ func_addr = gen_reg_rtx (Pmode);
19451
+ emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
19453
- gcc_assert (cfun);
19454
- gcc_assert (cfun->machine);
19455
+ /* Prepare to load the TOC of the called function. Note that the
19456
+ TOC load must happen immediately before the actual call so
19457
+ that unwinding the TOC registers works correctly. See the
19458
+ comment in frob_update_context. */
19459
+ rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
19460
+ rtx func_toc_mem = gen_rtx_MEM (Pmode,
19461
+ gen_rtx_PLUS (Pmode, func_desc,
19462
+ func_toc_offset));
19463
+ toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
19465
- /* Can we optimize saving the TOC in the prologue or do we need to do it at
19467
- if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
19468
- cfun->machine->save_toc_in_prologue = true;
19470
+ /* If we have a static chain, load it up. */
19471
+ if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19473
+ rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
19474
+ rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
19475
+ rtx func_sc_mem = gen_rtx_MEM (Pmode,
19476
+ gen_rtx_PLUS (Pmode, func_desc,
19477
+ func_sc_offset));
19478
+ emit_move_insn (sc_reg, func_sc_mem);
19479
+ abi_reg = sc_reg;
19485
- MEM_VOLATILE_P (stack_toc_mem) = 1;
19486
- emit_move_insn (stack_toc_mem, toc_reg);
19487
+ /* Direct calls use the TOC: for local calls, the callee will
19488
+ assume the TOC register is set; for non-local calls, the
19489
+ PLT stub needs the TOC register. */
19490
+ abi_reg = toc_reg;
19491
+ func_addr = func_desc;
19494
- /* Calculate the address to load the TOC of the called function. We don't
19495
- actually load this until the split after reload. */
19496
- func_toc_mem = gen_rtx_MEM (Pmode,
19497
- gen_rtx_PLUS (Pmode,
19499
- func_toc_offset));
19500
+ /* Create the call. */
19501
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
19502
+ if (value != NULL_RTX)
19503
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
19506
- /* If we have a static chain, load it up. */
19507
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19509
- func_sc_mem = gen_rtx_MEM (Pmode,
19510
- gen_rtx_PLUS (Pmode,
19512
- func_sc_offset));
19514
+ call[n_call++] = toc_load;
19516
+ call[n_call++] = toc_restore;
19518
- sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
19519
- emit_move_insn (sc_reg, func_sc_mem);
19521
+ call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
19523
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
19524
+ insn = emit_call_insn (insn);
19526
+ /* Mention all registers defined by the ABI to hold information
19527
+ as uses in CALL_INSN_FUNCTION_USAGE. */
19529
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
19532
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
19535
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
19540
+ gcc_assert (INTVAL (cookie) == 0);
19542
/* Create the call. */
19544
- insn = call_value_func (value, func_addr, flag, func_toc_mem,
19547
- insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
19548
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
19549
+ if (value != NULL_RTX)
19550
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
19552
- emit_call_insn (insn);
19553
+ call[1] = simple_return_rtx;
19555
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
19556
+ insn = emit_call_insn (insn);
19558
+ /* Note use of the TOC register. */
19559
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
19560
+ /* We need to also mark a use of the link register since the function we
19561
+ sibling-call to will use it to return to our caller. */
19562
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
19565
/* Return whether we need to always update the saved TOC pointer when we update
19566
@@ -28680,6 +31698,656 @@
19567
add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19571
+/* Helper function for rs6000_split_logical to emit a logical instruction after
19572
+ spliting the operation to single GPR registers.
19574
+ DEST is the destination register.
19575
+ OP1 and OP2 are the input source registers.
19576
+ CODE is the base operation (AND, IOR, XOR, NOT).
19577
+ MODE is the machine mode.
19578
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
19579
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
19580
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
19581
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
19582
+ formation of the AND instructions. */
19585
+rs6000_split_logical_inner (rtx dest,
19588
+ enum rtx_code code,
19589
+ enum machine_mode mode,
19590
+ bool complement_final_p,
19591
+ bool complement_op1_p,
19592
+ bool complement_op2_p,
19598
+ /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
19599
+ if (op2 && GET_CODE (op2) == CONST_INT
19600
+ && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
19601
+ && !complement_final_p && !complement_op1_p && !complement_op2_p)
19603
+ HOST_WIDE_INT mask = GET_MODE_MASK (mode);
19604
+ HOST_WIDE_INT value = INTVAL (op2) & mask;
19606
+ /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
19611
+ emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
19615
+ else if (value == mask)
19617
+ if (!rtx_equal_p (dest, op1))
19618
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
19623
+ /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
19624
+ into separate ORI/ORIS or XORI/XORIS instrucitons. */
19625
+ else if (code == IOR || code == XOR)
19629
+ if (!rtx_equal_p (dest, op1))
19630
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
19636
+ if (complement_op1_p)
19637
+ op1 = gen_rtx_NOT (mode, op1);
19639
+ if (complement_op2_p)
19640
+ op2 = gen_rtx_NOT (mode, op2);
19642
+ bool_rtx = ((code == NOT)
19643
+ ? gen_rtx_NOT (mode, op1)
19644
+ : gen_rtx_fmt_ee (code, mode, op1, op2));
19646
+ if (complement_final_p)
19647
+ bool_rtx = gen_rtx_NOT (mode, bool_rtx);
19649
+ set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
19651
+ /* Is this AND with an explicit clobber? */
19654
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
19655
+ set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
19658
+ emit_insn (set_rtx);
19662
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
19663
+ operations are split immediately during RTL generation to allow for more
19664
+ optimizations of the AND/IOR/XOR.
19666
+ OPERANDS is an array containing the destination and two input operands.
19667
+ CODE is the base operation (AND, IOR, XOR, NOT).
19668
+ MODE is the machine mode.
19669
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
19670
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
19671
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
19672
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
19673
+ formation of the AND instructions. */
19676
+rs6000_split_logical_di (rtx operands[3],
19677
+ enum rtx_code code,
19678
+ bool complement_final_p,
19679
+ bool complement_op1_p,
19680
+ bool complement_op2_p,
19683
+ const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
19684
+ const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
19685
+ const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
19686
+ enum hi_lo { hi = 0, lo = 1 };
19687
+ rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
19690
+ op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
19691
+ op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
19692
+ op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
19693
+ op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
19696
+ op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
19699
+ if (GET_CODE (operands[2]) != CONST_INT)
19701
+ op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
19702
+ op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
19706
+ HOST_WIDE_INT value = INTVAL (operands[2]);
19707
+ HOST_WIDE_INT value_hi_lo[2];
19709
+ gcc_assert (!complement_final_p);
19710
+ gcc_assert (!complement_op1_p);
19711
+ gcc_assert (!complement_op2_p);
19713
+ value_hi_lo[hi] = value >> 32;
19714
+ value_hi_lo[lo] = value & lower_32bits;
19716
+ for (i = 0; i < 2; i++)
19718
+ HOST_WIDE_INT sub_value = value_hi_lo[i];
19720
+ if (sub_value & sign_bit)
19721
+ sub_value |= upper_32bits;
19723
+ op2_hi_lo[i] = GEN_INT (sub_value);
19725
+ /* If this is an AND instruction, check to see if we need to load
19726
+ the value in a register. */
19727
+ if (code == AND && sub_value != -1 && sub_value != 0
19728
+ && !and_operand (op2_hi_lo[i], SImode))
19729
+ op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
19734
+ for (i = 0; i < 2; i++)
19736
+ /* Split large IOR/XOR operations. */
19737
+ if ((code == IOR || code == XOR)
19738
+ && GET_CODE (op2_hi_lo[i]) == CONST_INT
19739
+ && !complement_final_p
19740
+ && !complement_op1_p
19741
+ && !complement_op2_p
19742
+ && clobber_reg == NULL_RTX
19743
+ && !logical_const_operand (op2_hi_lo[i], SImode))
19745
+ HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
19746
+ HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
19747
+ HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
19748
+ rtx tmp = gen_reg_rtx (SImode);
19750
+ /* Make sure the constant is sign extended. */
19751
+ if ((hi_16bits & sign_bit) != 0)
19752
+ hi_16bits |= upper_32bits;
19754
+ rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
19755
+ code, SImode, false, false, false,
19758
+ rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
19759
+ code, SImode, false, false, false,
19763
+ rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
19764
+ code, SImode, complement_final_p,
19765
+ complement_op1_p, complement_op2_p,
19772
+/* Split the insns that make up boolean operations operating on multiple GPR
19773
+ registers. The boolean MD patterns ensure that the inputs either are
19774
+ exactly the same as the output registers, or there is no overlap.
19776
+ OPERANDS is an array containing the destination and two input operands.
19777
+ CODE is the base operation (AND, IOR, XOR, NOT).
19778
+ MODE is the machine mode.
19779
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
19780
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
19781
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
19782
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
19783
+ formation of the AND instructions. */
19786
+rs6000_split_logical (rtx operands[3],
19787
+ enum rtx_code code,
19788
+ bool complement_final_p,
19789
+ bool complement_op1_p,
19790
+ bool complement_op2_p,
19793
+ enum machine_mode mode = GET_MODE (operands[0]);
19794
+ enum machine_mode sub_mode;
19795
+ rtx op0, op1, op2;
19796
+ int sub_size, regno0, regno1, nregs, i;
19798
+ /* If this is DImode, use the specialized version that can run before
19799
+ register allocation. */
19800
+ if (mode == DImode && !TARGET_POWERPC64)
19802
+ rs6000_split_logical_di (operands, code, complement_final_p,
19803
+ complement_op1_p, complement_op2_p,
19808
+ op0 = operands[0];
19809
+ op1 = operands[1];
19810
+ op2 = (code == NOT) ? NULL_RTX : operands[2];
19811
+ sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
19812
+ sub_size = GET_MODE_SIZE (sub_mode);
19813
+ regno0 = REGNO (op0);
19814
+ regno1 = REGNO (op1);
19816
+ gcc_assert (reload_completed);
19817
+ gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
19818
+ gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
19820
+ nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
19821
+ gcc_assert (nregs > 1);
19823
+ if (op2 && REG_P (op2))
19824
+ gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
19826
+ for (i = 0; i < nregs; i++)
19828
+ int offset = i * sub_size;
19829
+ rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
19830
+ rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
19831
+ rtx sub_op2 = ((code == NOT)
19833
+ : simplify_subreg (sub_mode, op2, mode, offset));
19835
+ rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
19836
+ complement_final_p, complement_op1_p,
19837
+ complement_op2_p, clobber_reg);
19844
+/* Return true if the peephole2 can combine a load involving a combination of
19845
+ an addis instruction and a load with an offset that can be fused together on
19848
+ The operands are:
19849
+ operands[0] register set with addis
19850
+ operands[1] value set via addis
19851
+ operands[2] target register being loaded
19852
+ operands[3] D-form memory reference using operands[0].
19854
+ In addition, we are passed a boolean that is true if this is a peephole2,
19855
+ and we can use see if the addis_reg is dead after the insn and can be
19856
+ replaced by the target register. */
19859
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
19861
+ rtx addis_reg = operands[0];
19862
+ rtx addis_value = operands[1];
19863
+ rtx target = operands[2];
19864
+ rtx mem = operands[3];
19868
+ /* Validate arguments. */
19869
+ if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
19872
+ if (!base_reg_operand (target, GET_MODE (target)))
19875
+ if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
19878
+ if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
19881
+ /* Allow sign/zero extension. */
19882
+ if (GET_CODE (mem) == ZERO_EXTEND
19883
+ || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
19884
+ mem = XEXP (mem, 0);
19886
+ if (!MEM_P (mem))
19889
+ addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
19890
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
19893
+ /* Validate that the register used to load the high value is either the
19894
+ register being loaded, or we can safely replace its use in a peephole2.
19896
+ If this is a peephole2, we assume that there are 2 instructions in the
19897
+ peephole (addis and load), so we want to check if the target register was
19898
+ not used in the memory address and the register to hold the addis result
19899
+ is dead after the peephole. */
19900
+ if (REGNO (addis_reg) != REGNO (target))
19905
+ if (reg_mentioned_p (target, mem))
19908
+ if (!peep2_reg_dead_p (2, addis_reg))
19912
+ base_reg = XEXP (addr, 0);
19913
+ return REGNO (addis_reg) == REGNO (base_reg);
19916
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
19917
+ sequence. We adjust the addis register to use the target register. If the
19918
+ load sign extends, we adjust the code to do the zero extending load, and an
19919
+ explicit sign extension later since the fusion only covers zero extending
19922
+ The operands are:
19923
+ operands[0] register set with addis (to be replaced with target)
19924
+ operands[1] value set via addis
19925
+ operands[2] target register being loaded
19926
+ operands[3] D-form memory reference using operands[0]. */
19929
+expand_fusion_gpr_load (rtx *operands)
19931
+ rtx addis_value = operands[1];
19932
+ rtx target = operands[2];
19933
+ rtx orig_mem = operands[3];
19934
+ rtx new_addr, new_mem, orig_addr, offset;
19935
+ enum rtx_code plus_or_lo_sum;
19936
+ enum machine_mode target_mode = GET_MODE (target);
19937
+ enum machine_mode extend_mode = target_mode;
19938
+ enum machine_mode ptr_mode = Pmode;
19939
+ enum rtx_code extend = UNKNOWN;
19940
+ rtx addis_reg = ((ptr_mode == target_mode)
19942
+ : simplify_subreg (ptr_mode, target, target_mode, 0));
19944
+ if (GET_CODE (orig_mem) == ZERO_EXTEND
19945
+ || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
19947
+ extend = GET_CODE (orig_mem);
19948
+ orig_mem = XEXP (orig_mem, 0);
19949
+ target_mode = GET_MODE (orig_mem);
19952
+ gcc_assert (MEM_P (orig_mem));
19954
+ orig_addr = XEXP (orig_mem, 0);
19955
+ plus_or_lo_sum = GET_CODE (orig_addr);
19956
+ gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
19958
+ offset = XEXP (orig_addr, 1);
19959
+ new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
19960
+ new_mem = change_address (orig_mem, target_mode, new_addr);
19962
+ if (extend != UNKNOWN)
19963
+ new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
19965
+ emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
19966
+ emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
19968
+ if (extend == SIGN_EXTEND)
19970
+ int sub_off = ((BYTES_BIG_ENDIAN)
19971
+ ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
19974
+ = simplify_subreg (target_mode, target, extend_mode, sub_off);
19976
+ emit_insn (gen_rtx_SET (VOIDmode, target,
19977
+ gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
19983
+/* Return a string to fuse an addis instruction with a gpr load to the same
19984
+ register that we loaded up the addis instruction. The code is complicated,
19985
+ so we call output_asm_insn directly, and just return "".
19987
+ The operands are:
19988
+ operands[0] register set with addis (must be same reg as target).
19989
+ operands[1] value set via addis
19990
+ operands[2] target register being loaded
19991
+ operands[3] D-form memory reference using operands[0]. */
19994
+emit_fusion_gpr_load (rtx *operands)
19996
+ rtx addis_reg = operands[0];
19997
+ rtx addis_value = operands[1];
19998
+ rtx target = operands[2];
19999
+ rtx mem = operands[3];
20000
+ rtx fuse_ops[10];
20003
+ const char *addis_str = NULL;
20004
+ const char *load_str = NULL;
20005
+ const char *extend_insn = NULL;
20006
+ const char *mode_name = NULL;
20007
+ char insn_template[80];
20008
+ enum machine_mode mode;
20009
+ const char *comment_str = ASM_COMMENT_START;
20010
+ bool sign_p = false;
20012
+ gcc_assert (REG_P (addis_reg) && REG_P (target));
20013
+ gcc_assert (REGNO (addis_reg) == REGNO (target));
20015
+ if (*comment_str == ' ')
20018
+ /* Allow sign/zero extension. */
20019
+ if (GET_CODE (mem) == ZERO_EXTEND)
20020
+ mem = XEXP (mem, 0);
20022
+ else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
20025
+ mem = XEXP (mem, 0);
20028
+ gcc_assert (MEM_P (mem));
20029
+ addr = XEXP (mem, 0);
20030
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
20031
+ gcc_unreachable ();
20033
+ load_offset = XEXP (addr, 1);
20035
+ /* Now emit the load instruction to the same register. */
20036
+ mode = GET_MODE (mem);
20040
+ mode_name = "char";
20041
+ load_str = "lbz";
20042
+ extend_insn = "extsb %0,%0";
20046
+ mode_name = "short";
20047
+ load_str = "lhz";
20048
+ extend_insn = "extsh %0,%0";
20052
+ mode_name = "int";
20053
+ load_str = "lwz";
20054
+ extend_insn = "extsw %0,%0";
20058
+ if (TARGET_POWERPC64)
20060
+ mode_name = "long";
20064
+ gcc_unreachable ();
20068
+ gcc_unreachable ();
20071
+ /* Emit the addis instruction. */
20072
+ fuse_ops[0] = target;
20073
+ if (satisfies_constraint_L (addis_value))
20075
+ fuse_ops[1] = addis_value;
20076
+ addis_str = "lis %0,%v1";
20079
+ else if (GET_CODE (addis_value) == PLUS)
20081
+ rtx op0 = XEXP (addis_value, 0);
20082
+ rtx op1 = XEXP (addis_value, 1);
20084
+ if (REG_P (op0) && CONST_INT_P (op1)
20085
+ && satisfies_constraint_L (op1))
20087
+ fuse_ops[1] = op0;
20088
+ fuse_ops[2] = op1;
20089
+ addis_str = "addis %0,%1,%v2";
20093
+ else if (GET_CODE (addis_value) == HIGH)
20095
+ rtx value = XEXP (addis_value, 0);
20096
+ if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
20098
+ fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
20099
+ fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
20101
+ addis_str = "addis %0,%2,%1@toc@ha";
20103
+ else if (TARGET_XCOFF)
20104
+ addis_str = "addis %0,%1@u(%2)";
20107
+ gcc_unreachable ();
20110
+ else if (GET_CODE (value) == PLUS)
20112
+ rtx op0 = XEXP (value, 0);
20113
+ rtx op1 = XEXP (value, 1);
20115
+ if (GET_CODE (op0) == UNSPEC
20116
+ && XINT (op0, 1) == UNSPEC_TOCREL
20117
+ && CONST_INT_P (op1))
20119
+ fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
20120
+ fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
20121
+ fuse_ops[3] = op1;
20123
+ addis_str = "addis %0,%2,%1+%3@toc@ha";
20125
+ else if (TARGET_XCOFF)
20126
+ addis_str = "addis %0,%1+%3@u(%2)";
20129
+ gcc_unreachable ();
20133
+ else if (satisfies_constraint_L (value))
20135
+ fuse_ops[1] = value;
20136
+ addis_str = "lis %0,%v1";
20139
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
20141
+ fuse_ops[1] = value;
20142
+ addis_str = "lis %0,%1@ha";
20147
+ fatal_insn ("Could not generate addis value for fusion", addis_value);
20149
+ sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
20150
+ comment_str, mode_name);
20151
+ output_asm_insn (insn_template, fuse_ops);
20153
+ /* Emit the D-form load instruction. */
20154
+ if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
20156
+ sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
20157
+ fuse_ops[1] = load_offset;
20158
+ output_asm_insn (insn_template, fuse_ops);
20161
+ else if (GET_CODE (load_offset) == UNSPEC
20162
+ && XINT (load_offset, 1) == UNSPEC_TOCREL)
20165
+ sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
20167
+ else if (TARGET_XCOFF)
20168
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
20171
+ gcc_unreachable ();
20173
+ fuse_ops[1] = XVECEXP (load_offset, 0, 0);
20174
+ output_asm_insn (insn_template, fuse_ops);
20177
+ else if (GET_CODE (load_offset) == PLUS
20178
+ && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
20179
+ && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
20180
+ && CONST_INT_P (XEXP (load_offset, 1)))
20182
+ rtx tocrel_unspec = XEXP (load_offset, 0);
20184
+ sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
20186
+ else if (TARGET_XCOFF)
20187
+ sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
20190
+ gcc_unreachable ();
20192
+ fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
20193
+ fuse_ops[2] = XEXP (load_offset, 1);
20194
+ output_asm_insn (insn_template, fuse_ops);
20197
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
20199
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
20201
+ fuse_ops[1] = load_offset;
20202
+ output_asm_insn (insn_template, fuse_ops);
20206
+ fatal_insn ("Unable to generate load offset for fusion", load_offset);
20208
+ /* Handle sign extension. The peephole2 pass generates this as a separate
20209
+ insn, but we handle it just in case it got reattached. */
20212
+ gcc_assert (extend_insn != NULL);
20213
+ output_asm_insn (extend_insn, fuse_ops);
20220
struct gcc_target targetm = TARGET_INITIALIZER;
20222
#include "gt-rs6000.h"
20223
--- a/src/gcc/config/rs6000/vsx.md
20224
+++ b/src/gcc/config/rs6000/vsx.md
20226
;; it to use gprs as well as vsx registers.
20227
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
20229
+(define_mode_iterator VSX_M2 [V16QI
20235
+ (TI "TARGET_VSX_TIMODE")])
20237
;; Map into the appropriate load/store name based on the type
20238
(define_mode_attr VSm [(V16QI "vw4")
20247
;; Map into the appropriate suffix based on the type
20248
(define_mode_attr VSs [(V16QI "sp")
20256
;; Map the register class used
20257
(define_mode_attr VSr [(V16QI "v")
20265
;; Map the register class used for float<->int conversions
20266
(define_mode_attr VSr2 [(V2DF "wd")
20267
@@ -115,7 +123,6 @@
20274
;; Appropriate type for add ops (and other simple FP ops)
20275
@@ -192,6 +199,8 @@
20276
UNSPEC_VSX_CVDPSXWS
20277
UNSPEC_VSX_CVDPUXWS
20279
+ UNSPEC_VSX_CVSPDPN
20280
+ UNSPEC_VSX_CVDPSPN
20284
@@ -207,77 +216,393 @@
20288
-(define_insn "*vsx_mov<mode>"
20289
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
20290
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
20291
- "VECTOR_MEM_VSX_P (<MODE>mode)
20292
- && (register_operand (operands[0], <MODE>mode)
20293
- || register_operand (operands[1], <MODE>mode))"
20295
+;; The patterns for LE permuted loads and stores come before the general
20296
+;; VSX moves so they match first.
20297
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
20298
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
20299
+ (match_operand:VSX_D 1 "memory_operand" "Z"))]
20300
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20302
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20303
+ [(set (match_dup 2)
20304
+ (vec_select:<MODE>
20306
+ (parallel [(const_int 1) (const_int 0)])))
20307
+ (set (match_dup 0)
20308
+ (vec_select:<MODE>
20310
+ (parallel [(const_int 1) (const_int 0)])))]
20313
- switch (which_alternative)
20317
- gcc_assert (MEM_P (operands[0])
20318
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
20319
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
20320
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
20321
- return "stx<VSm>x %x1,%y0";
20322
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
20326
+ [(set_attr "type" "vecload")
20327
+ (set_attr "length" "8")])
20331
- gcc_assert (MEM_P (operands[1])
20332
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
20333
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
20334
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
20335
- return "lx<VSm>x %x0,%y1";
20336
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
20337
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
20338
+ (match_operand:VSX_W 1 "memory_operand" "Z"))]
20339
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20341
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20342
+ [(set (match_dup 2)
20343
+ (vec_select:<MODE>
20345
+ (parallel [(const_int 2) (const_int 3)
20346
+ (const_int 0) (const_int 1)])))
20347
+ (set (match_dup 0)
20348
+ (vec_select:<MODE>
20350
+ (parallel [(const_int 2) (const_int 3)
20351
+ (const_int 0) (const_int 1)])))]
20354
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
20358
+ [(set_attr "type" "vecload")
20359
+ (set_attr "length" "8")])
20363
- return "xxlor %x0,%x1,%x1";
20364
+(define_insn_and_split "*vsx_le_perm_load_v8hi"
20365
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
20366
+ (match_operand:V8HI 1 "memory_operand" "Z"))]
20367
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20369
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20370
+ [(set (match_dup 2)
20373
+ (parallel [(const_int 4) (const_int 5)
20374
+ (const_int 6) (const_int 7)
20375
+ (const_int 0) (const_int 1)
20376
+ (const_int 2) (const_int 3)])))
20377
+ (set (match_dup 0)
20380
+ (parallel [(const_int 4) (const_int 5)
20381
+ (const_int 6) (const_int 7)
20382
+ (const_int 0) (const_int 1)
20383
+ (const_int 2) (const_int 3)])))]
20386
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
20390
+ [(set_attr "type" "vecload")
20391
+ (set_attr "length" "8")])
20398
+(define_insn_and_split "*vsx_le_perm_load_v16qi"
20399
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
20400
+ (match_operand:V16QI 1 "memory_operand" "Z"))]
20401
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20403
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20404
+ [(set (match_dup 2)
20405
+ (vec_select:V16QI
20407
+ (parallel [(const_int 8) (const_int 9)
20408
+ (const_int 10) (const_int 11)
20409
+ (const_int 12) (const_int 13)
20410
+ (const_int 14) (const_int 15)
20411
+ (const_int 0) (const_int 1)
20412
+ (const_int 2) (const_int 3)
20413
+ (const_int 4) (const_int 5)
20414
+ (const_int 6) (const_int 7)])))
20415
+ (set (match_dup 0)
20416
+ (vec_select:V16QI
20418
+ (parallel [(const_int 8) (const_int 9)
20419
+ (const_int 10) (const_int 11)
20420
+ (const_int 12) (const_int 13)
20421
+ (const_int 14) (const_int 15)
20422
+ (const_int 0) (const_int 1)
20423
+ (const_int 2) (const_int 3)
20424
+ (const_int 4) (const_int 5)
20425
+ (const_int 6) (const_int 7)])))]
20428
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
20432
+ [(set_attr "type" "vecload")
20433
+ (set_attr "length" "8")])
20437
- return "xxlxor %x0,%x0,%x0";
20438
+(define_insn "*vsx_le_perm_store_<mode>"
20439
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
20440
+ (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
20441
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20443
+ [(set_attr "type" "vecstore")
20444
+ (set_attr "length" "12")])
20447
- return output_vec_const_move (operands);
20449
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
20450
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
20451
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
20452
+ [(set (match_dup 2)
20453
+ (vec_select:<MODE>
20455
+ (parallel [(const_int 1) (const_int 0)])))
20456
+ (set (match_dup 0)
20457
+ (vec_select:<MODE>
20459
+ (parallel [(const_int 1) (const_int 0)])))]
20461
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
20466
- gcc_assert (MEM_P (operands[0])
20467
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
20468
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
20469
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
20470
- return "stvx %1,%y0";
20471
+;; The post-reload split requires that we re-permute the source
20472
+;; register in case it is still live.
20474
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
20475
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
20476
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
20477
+ [(set (match_dup 1)
20478
+ (vec_select:<MODE>
20480
+ (parallel [(const_int 1) (const_int 0)])))
20481
+ (set (match_dup 0)
20482
+ (vec_select:<MODE>
20484
+ (parallel [(const_int 1) (const_int 0)])))
20485
+ (set (match_dup 1)
20486
+ (vec_select:<MODE>
20488
+ (parallel [(const_int 1) (const_int 0)])))]
20492
- gcc_assert (MEM_P (operands[0])
20493
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
20494
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
20495
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
20496
- return "lvx %0,%y1";
20497
+(define_insn "*vsx_le_perm_store_<mode>"
20498
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
20499
+ (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
20500
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20502
+ [(set_attr "type" "vecstore")
20503
+ (set_attr "length" "12")])
20506
- gcc_unreachable ();
20509
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
20510
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
20511
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
20512
+ [(set (match_dup 2)
20513
+ (vec_select:<MODE>
20515
+ (parallel [(const_int 2) (const_int 3)
20516
+ (const_int 0) (const_int 1)])))
20517
+ (set (match_dup 0)
20518
+ (vec_select:<MODE>
20520
+ (parallel [(const_int 2) (const_int 3)
20521
+ (const_int 0) (const_int 1)])))]
20523
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
20527
+;; The post-reload split requires that we re-permute the source
20528
+;; register in case it is still live.
20530
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
20531
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
20532
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
20533
+ [(set (match_dup 1)
20534
+ (vec_select:<MODE>
20536
+ (parallel [(const_int 2) (const_int 3)
20537
+ (const_int 0) (const_int 1)])))
20538
+ (set (match_dup 0)
20539
+ (vec_select:<MODE>
20541
+ (parallel [(const_int 2) (const_int 3)
20542
+ (const_int 0) (const_int 1)])))
20543
+ (set (match_dup 1)
20544
+ (vec_select:<MODE>
20546
+ (parallel [(const_int 2) (const_int 3)
20547
+ (const_int 0) (const_int 1)])))]
20550
+(define_insn "*vsx_le_perm_store_v8hi"
20551
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
20552
+ (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
20553
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20555
+ [(set_attr "type" "vecstore")
20556
+ (set_attr "length" "12")])
20559
+ [(set (match_operand:V8HI 0 "memory_operand" "")
20560
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
20561
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
20562
+ [(set (match_dup 2)
20565
+ (parallel [(const_int 4) (const_int 5)
20566
+ (const_int 6) (const_int 7)
20567
+ (const_int 0) (const_int 1)
20568
+ (const_int 2) (const_int 3)])))
20569
+ (set (match_dup 0)
20572
+ (parallel [(const_int 4) (const_int 5)
20573
+ (const_int 6) (const_int 7)
20574
+ (const_int 0) (const_int 1)
20575
+ (const_int 2) (const_int 3)])))]
20577
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
20581
+;; The post-reload split requires that we re-permute the source
20582
+;; register in case it is still live.
20584
+ [(set (match_operand:V8HI 0 "memory_operand" "")
20585
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
20586
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
20587
+ [(set (match_dup 1)
20590
+ (parallel [(const_int 4) (const_int 5)
20591
+ (const_int 6) (const_int 7)
20592
+ (const_int 0) (const_int 1)
20593
+ (const_int 2) (const_int 3)])))
20594
+ (set (match_dup 0)
20597
+ (parallel [(const_int 4) (const_int 5)
20598
+ (const_int 6) (const_int 7)
20599
+ (const_int 0) (const_int 1)
20600
+ (const_int 2) (const_int 3)])))
20601
+ (set (match_dup 1)
20604
+ (parallel [(const_int 4) (const_int 5)
20605
+ (const_int 6) (const_int 7)
20606
+ (const_int 0) (const_int 1)
20607
+ (const_int 2) (const_int 3)])))]
20610
+(define_insn "*vsx_le_perm_store_v16qi"
20611
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
20612
+ (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
20613
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
20615
+ [(set_attr "type" "vecstore")
20616
+ (set_attr "length" "12")])
20619
+ [(set (match_operand:V16QI 0 "memory_operand" "")
20620
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
20621
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
20622
+ [(set (match_dup 2)
20623
+ (vec_select:V16QI
20625
+ (parallel [(const_int 8) (const_int 9)
20626
+ (const_int 10) (const_int 11)
20627
+ (const_int 12) (const_int 13)
20628
+ (const_int 14) (const_int 15)
20629
+ (const_int 0) (const_int 1)
20630
+ (const_int 2) (const_int 3)
20631
+ (const_int 4) (const_int 5)
20632
+ (const_int 6) (const_int 7)])))
20633
+ (set (match_dup 0)
20634
+ (vec_select:V16QI
20636
+ (parallel [(const_int 8) (const_int 9)
20637
+ (const_int 10) (const_int 11)
20638
+ (const_int 12) (const_int 13)
20639
+ (const_int 14) (const_int 15)
20640
+ (const_int 0) (const_int 1)
20641
+ (const_int 2) (const_int 3)
20642
+ (const_int 4) (const_int 5)
20643
+ (const_int 6) (const_int 7)])))]
20645
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
20649
+;; The post-reload split requires that we re-permute the source
20650
+;; register in case it is still live.
20652
+ [(set (match_operand:V16QI 0 "memory_operand" "")
20653
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
20654
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
20655
+ [(set (match_dup 1)
20656
+ (vec_select:V16QI
20658
+ (parallel [(const_int 8) (const_int 9)
20659
+ (const_int 10) (const_int 11)
20660
+ (const_int 12) (const_int 13)
20661
+ (const_int 14) (const_int 15)
20662
+ (const_int 0) (const_int 1)
20663
+ (const_int 2) (const_int 3)
20664
+ (const_int 4) (const_int 5)
20665
+ (const_int 6) (const_int 7)])))
20666
+ (set (match_dup 0)
20667
+ (vec_select:V16QI
20669
+ (parallel [(const_int 8) (const_int 9)
20670
+ (const_int 10) (const_int 11)
20671
+ (const_int 12) (const_int 13)
20672
+ (const_int 14) (const_int 15)
20673
+ (const_int 0) (const_int 1)
20674
+ (const_int 2) (const_int 3)
20675
+ (const_int 4) (const_int 5)
20676
+ (const_int 6) (const_int 7)])))
20677
+ (set (match_dup 1)
20678
+ (vec_select:V16QI
20680
+ (parallel [(const_int 8) (const_int 9)
20681
+ (const_int 10) (const_int 11)
20682
+ (const_int 12) (const_int 13)
20683
+ (const_int 14) (const_int 15)
20684
+ (const_int 0) (const_int 1)
20685
+ (const_int 2) (const_int 3)
20686
+ (const_int 4) (const_int 5)
20687
+ (const_int 6) (const_int 7)])))]
20691
+(define_insn "*vsx_mov<mode>"
20692
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
20693
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
20694
+ "VECTOR_MEM_VSX_P (<MODE>mode)
20695
+ && (register_operand (operands[0], <MODE>mode)
20696
+ || register_operand (operands[1], <MODE>mode))"
20698
+ return rs6000_output_move_128bit (operands);
20700
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
20701
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
20702
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
20704
-;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
20705
-;; unions. However for plain data movement, slightly favor the vector loads
20706
-(define_insn "*vsx_movti"
20707
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ")
20708
- (match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))]
20709
- "VECTOR_MEM_VSX_P (TImode)
20710
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
20711
+;; use of TImode is for unions. However for plain data movement, slightly
20712
+;; favor the vector loads
20713
+(define_insn "*vsx_movti_64bit"
20714
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
20715
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
20716
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
20717
&& (register_operand (operands[0], TImode)
20718
|| register_operand (operands[1], TImode))"
20720
+ return rs6000_output_move_128bit (operands);
20722
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
20723
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
20725
+(define_insn "*vsx_movti_32bit"
20726
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
20727
+ (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
20728
+ "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
20729
+ && (register_operand (operands[0], TImode)
20730
+ || register_operand (operands[1], TImode))"
20732
switch (which_alternative)
20735
@@ -290,27 +615,45 @@
20736
return "xxlor %x0,%x1,%x1";
20739
+ return "xxlxor %x0,%x0,%x0";
20742
+ return output_vec_const_move (operands);
20746
+ return "stvx %1,%y0";
20749
- return "xxlxor %x0,%x0,%x0";
20750
+ return "lvx %0,%y1";
20753
- return output_vec_const_move (operands);
20754
+ if (TARGET_STRING)
20755
+ return \"stswi %1,%P0,16\";
20758
- return "stvx %1,%y0";
20762
- return "lvx %0,%y1";
20763
+ /* If the address is not used in the output, we can use lsi. Otherwise,
20764
+ fall through to generating four loads. */
20765
+ if (TARGET_STRING
20766
+ && ! reg_overlap_mentioned_p (operands[0], operands[1]))
20767
+ return \"lswi %0,%P1,16\";
20768
+ /* ... fall through ... */
20775
gcc_unreachable ();
20778
- [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
20779
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
20780
+ (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
20781
+ (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
20782
+ (const_string "always")
20783
+ (const_string "conditional")))])
20785
;; Explicit load/store expanders for the builtin functions
20786
(define_expand "vsx_load_<mode>"
20787
@@ -320,46 +663,48 @@
20790
(define_expand "vsx_store_<mode>"
20791
- [(set (match_operand:VEC_M 0 "memory_operand" "")
20792
- (match_operand:VEC_M 1 "vsx_register_operand" ""))]
20793
+ [(set (match_operand:VSX_M 0 "memory_operand" "")
20794
+ (match_operand:VSX_M 1 "vsx_register_operand" ""))]
20795
"VECTOR_MEM_VSX_P (<MODE>mode)"
20799
-;; VSX scalar and vector floating point arithmetic instructions
20800
+;; VSX vector floating point arithmetic instructions. The VSX scalar
20801
+;; instructions are now combined with the insn for the traditional floating
20803
(define_insn "*vsx_add<mode>3"
20804
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20805
- (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20806
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20807
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20808
+ (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20809
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20810
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20811
- "x<VSv>add<VSs> %x0,%x1,%x2"
20812
+ "xvadd<VSs> %x0,%x1,%x2"
20813
[(set_attr "type" "<VStype_simple>")
20814
(set_attr "fp_type" "<VSfptype_simple>")])
20816
(define_insn "*vsx_sub<mode>3"
20817
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20818
- (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20819
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20820
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20821
+ (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20822
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20823
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20824
- "x<VSv>sub<VSs> %x0,%x1,%x2"
20825
+ "xvsub<VSs> %x0,%x1,%x2"
20826
[(set_attr "type" "<VStype_simple>")
20827
(set_attr "fp_type" "<VSfptype_simple>")])
20829
(define_insn "*vsx_mul<mode>3"
20830
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20831
- (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20832
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20833
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20834
+ (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20835
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20836
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20837
- "x<VSv>mul<VSs> %x0,%x1,%x2"
20838
- [(set_attr "type" "<VStype_mul>")
20839
+ "xvmul<VSs> %x0,%x1,%x2"
20840
+ [(set_attr "type" "<VStype_simple>")
20841
(set_attr "fp_type" "<VSfptype_mul>")])
20843
(define_insn "*vsx_div<mode>3"
20844
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20845
- (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20846
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20847
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20848
+ (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20849
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20850
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20851
- "x<VSv>div<VSs> %x0,%x1,%x2"
20852
+ "xvdiv<VSs> %x0,%x1,%x2"
20853
[(set_attr "type" "<VStype_div>")
20854
(set_attr "fp_type" "<VSfptype_div>")])
20856
@@ -402,94 +747,72 @@
20857
(set_attr "fp_type" "<VSfptype_simple>")])
20859
(define_insn "vsx_fre<mode>2"
20860
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20861
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
20862
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20863
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
20865
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20866
- "x<VSv>re<VSs> %x0,%x1"
20867
+ "xvre<VSs> %x0,%x1"
20868
[(set_attr "type" "<VStype_simple>")
20869
(set_attr "fp_type" "<VSfptype_simple>")])
20871
(define_insn "*vsx_neg<mode>2"
20872
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20873
- (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
20874
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20875
+ (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
20876
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20877
- "x<VSv>neg<VSs> %x0,%x1"
20878
+ "xvneg<VSs> %x0,%x1"
20879
[(set_attr "type" "<VStype_simple>")
20880
(set_attr "fp_type" "<VSfptype_simple>")])
20882
(define_insn "*vsx_abs<mode>2"
20883
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20884
- (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
20885
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20886
+ (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
20887
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20888
- "x<VSv>abs<VSs> %x0,%x1"
20889
+ "xvabs<VSs> %x0,%x1"
20890
[(set_attr "type" "<VStype_simple>")
20891
(set_attr "fp_type" "<VSfptype_simple>")])
20893
(define_insn "vsx_nabs<mode>2"
20894
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20897
- (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
20898
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20901
+ (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
20902
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20903
- "x<VSv>nabs<VSs> %x0,%x1"
20904
+ "xvnabs<VSs> %x0,%x1"
20905
[(set_attr "type" "<VStype_simple>")
20906
(set_attr "fp_type" "<VSfptype_simple>")])
20908
(define_insn "vsx_smax<mode>3"
20909
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20910
- (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20911
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20912
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20913
+ (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20914
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20915
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20916
- "x<VSv>max<VSs> %x0,%x1,%x2"
20917
+ "xvmax<VSs> %x0,%x1,%x2"
20918
[(set_attr "type" "<VStype_simple>")
20919
(set_attr "fp_type" "<VSfptype_simple>")])
20921
(define_insn "*vsx_smin<mode>3"
20922
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20923
- (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
20924
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
20925
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20926
+ (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
20927
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
20928
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20929
- "x<VSv>min<VSs> %x0,%x1,%x2"
20930
+ "xvmin<VSs> %x0,%x1,%x2"
20931
[(set_attr "type" "<VStype_simple>")
20932
(set_attr "fp_type" "<VSfptype_simple>")])
20934
-;; Special VSX version of smin/smax for single precision floating point. Since
20935
-;; both numbers are rounded to single precision, we can just use the DP version
20936
-;; of the instruction.
20938
-(define_insn "*vsx_smaxsf3"
20939
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
20940
- (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
20941
- (match_operand:SF 2 "vsx_register_operand" "f")))]
20942
- "VECTOR_UNIT_VSX_P (DFmode)"
20943
- "xsmaxdp %x0,%x1,%x2"
20944
- [(set_attr "type" "fp")
20945
- (set_attr "fp_type" "fp_addsub_d")])
20947
-(define_insn "*vsx_sminsf3"
20948
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
20949
- (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
20950
- (match_operand:SF 2 "vsx_register_operand" "f")))]
20951
- "VECTOR_UNIT_VSX_P (DFmode)"
20952
- "xsmindp %x0,%x1,%x2"
20953
- [(set_attr "type" "fp")
20954
- (set_attr "fp_type" "fp_addsub_d")])
20956
(define_insn "*vsx_sqrt<mode>2"
20957
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20958
- (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
20959
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20960
+ (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
20961
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20962
- "x<VSv>sqrt<VSs> %x0,%x1"
20963
+ "xvsqrt<VSs> %x0,%x1"
20964
[(set_attr "type" "<VStype_sqrt>")
20965
(set_attr "fp_type" "<VSfptype_sqrt>")])
20967
(define_insn "*vsx_rsqrte<mode>2"
20968
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
20969
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
20970
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
20971
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
20973
"VECTOR_UNIT_VSX_P (<MODE>mode)"
20974
- "x<VSv>rsqrte<VSs> %x0,%x1"
20975
+ "xvrsqrte<VSs> %x0,%x1"
20976
[(set_attr "type" "<VStype_simple>")
20977
(set_attr "fp_type" "<VSfptype_simple>")])
20979
@@ -528,27 +851,11 @@
20980
[(set_attr "type" "<VStype_simple>")
20981
(set_attr "fp_type" "<VSfptype_simple>")])
20983
-;; Fused vector multiply/add instructions Support the classical DF versions of
20984
-;; fma, which allows the target to be a separate register from the 3 inputs.
20985
-;; Under VSX, the target must be either the addend or the first multiply.
20986
-;; Where we can, also do the same for the Altivec V4SF fmas.
20987
+;; Fused vector multiply/add instructions. Support the classical Altivec
20988
+;; versions of fma, which allows the target to be a separate register from the
20989
+;; 3 inputs. Under VSX, the target must be either the addend or the first
20992
-(define_insn "*vsx_fmadf4"
20993
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
20995
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
20996
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
20997
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
20998
- "VECTOR_UNIT_VSX_P (DFmode)"
21000
- xsmaddadp %x0,%x1,%x2
21001
- xsmaddmdp %x0,%x1,%x3
21002
- xsmaddadp %x0,%x1,%x2
21003
- xsmaddmdp %x0,%x1,%x3
21004
- fmadd %0,%1,%2,%3"
21005
- [(set_attr "type" "fp")
21006
- (set_attr "fp_type" "fp_maddsub_d")])
21008
(define_insn "*vsx_fmav4sf4"
21009
[(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
21011
@@ -578,23 +885,6 @@
21012
xvmaddmdp %x0,%x1,%x3"
21013
[(set_attr "type" "vecdouble")])
21015
-(define_insn "*vsx_fmsdf4"
21016
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
21018
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
21019
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
21021
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
21022
- "VECTOR_UNIT_VSX_P (DFmode)"
21024
- xsmsubadp %x0,%x1,%x2
21025
- xsmsubmdp %x0,%x1,%x3
21026
- xsmsubadp %x0,%x1,%x2
21027
- xsmsubmdp %x0,%x1,%x3
21028
- fmsub %0,%1,%2,%3"
21029
- [(set_attr "type" "fp")
21030
- (set_attr "fp_type" "fp_maddsub_d")])
21032
(define_insn "*vsx_fms<mode>4"
21033
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
21035
@@ -604,29 +894,12 @@
21036
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
21037
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21039
- x<VSv>msuba<VSs> %x0,%x1,%x2
21040
- x<VSv>msubm<VSs> %x0,%x1,%x3
21041
- x<VSv>msuba<VSs> %x0,%x1,%x2
21042
- x<VSv>msubm<VSs> %x0,%x1,%x3"
21043
+ xvmsuba<VSs> %x0,%x1,%x2
21044
+ xvmsubm<VSs> %x0,%x1,%x3
21045
+ xvmsuba<VSs> %x0,%x1,%x2
21046
+ xvmsubm<VSs> %x0,%x1,%x3"
21047
[(set_attr "type" "<VStype_mul>")])
21049
-(define_insn "*vsx_nfmadf4"
21050
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
21053
- (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
21054
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
21055
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
21056
- "VECTOR_UNIT_VSX_P (DFmode)"
21058
- xsnmaddadp %x0,%x1,%x2
21059
- xsnmaddmdp %x0,%x1,%x3
21060
- xsnmaddadp %x0,%x1,%x2
21061
- xsnmaddmdp %x0,%x1,%x3
21062
- fnmadd %0,%1,%2,%3"
21063
- [(set_attr "type" "fp")
21064
- (set_attr "fp_type" "fp_maddsub_d")])
21066
(define_insn "*vsx_nfma<mode>4"
21067
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
21069
@@ -636,31 +909,13 @@
21070
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
21071
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21073
- x<VSv>nmadda<VSs> %x0,%x1,%x2
21074
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
21075
- x<VSv>nmadda<VSs> %x0,%x1,%x2
21076
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
21077
+ xvnmadda<VSs> %x0,%x1,%x2
21078
+ xvnmaddm<VSs> %x0,%x1,%x3
21079
+ xvnmadda<VSs> %x0,%x1,%x2
21080
+ xvnmaddm<VSs> %x0,%x1,%x3"
21081
[(set_attr "type" "<VStype_mul>")
21082
(set_attr "fp_type" "<VSfptype_mul>")])
21084
-(define_insn "*vsx_nfmsdf4"
21085
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
21088
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
21089
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
21091
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
21092
- "VECTOR_UNIT_VSX_P (DFmode)"
21094
- xsnmsubadp %x0,%x1,%x2
21095
- xsnmsubmdp %x0,%x1,%x3
21096
- xsnmsubadp %x0,%x1,%x2
21097
- xsnmsubmdp %x0,%x1,%x3
21098
- fnmsub %0,%1,%2,%3"
21099
- [(set_attr "type" "fp")
21100
- (set_attr "fp_type" "fp_maddsub_d")])
21102
(define_insn "*vsx_nfmsv4sf4"
21103
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
21105
@@ -722,16 +977,6 @@
21106
[(set_attr "type" "<VStype_simple>")
21107
(set_attr "fp_type" "<VSfptype_simple>")])
21109
-;; Floating point scalar compare
21110
-(define_insn "*vsx_cmpdf_internal1"
21111
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
21112
- (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
21113
- (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
21114
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
21115
- && VECTOR_UNIT_VSX_P (DFmode)"
21116
- "xscmpudp %0,%x1,%x2"
21117
- [(set_attr "type" "fpcompare")])
21119
;; Compare vectors producing a vector result and a predicate, setting CR6 to
21120
;; indicate a combined status
21121
(define_insn "*vsx_eq_<mode>_p"
21122
@@ -798,13 +1043,13 @@
21125
(define_insn "vsx_copysign<mode>3"
21126
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
21128
- [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
21129
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
21130
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
21132
+ [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
21133
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
21135
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21136
- "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
21137
+ "xvcpsgn<VSs> %x0,%x2,%x1"
21138
[(set_attr "type" "<VStype_simple>")
21139
(set_attr "fp_type" "<VSfptype_simple>")])
21141
@@ -865,10 +1110,10 @@
21142
(set_attr "fp_type" "<VSfptype_simple>")])
21144
(define_insn "vsx_btrunc<mode>2"
21145
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
21146
- (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
21147
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
21148
+ (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
21149
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21150
- "x<VSv>r<VSs>iz %x0,%x1"
21151
+ "xvr<VSs>iz %x0,%x1"
21152
[(set_attr "type" "<VStype_simple>")
21153
(set_attr "fp_type" "<VSfptype_simple>")])
21155
@@ -882,20 +1127,20 @@
21156
(set_attr "fp_type" "<VSfptype_simple>")])
21158
(define_insn "vsx_floor<mode>2"
21159
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
21160
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
21161
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
21162
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
21164
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21165
- "x<VSv>r<VSs>im %x0,%x1"
21166
+ "xvr<VSs>im %x0,%x1"
21167
[(set_attr "type" "<VStype_simple>")
21168
(set_attr "fp_type" "<VSfptype_simple>")])
21170
(define_insn "vsx_ceil<mode>2"
21171
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
21172
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
21173
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
21174
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
21176
"VECTOR_UNIT_VSX_P (<MODE>mode)"
21177
- "x<VSv>r<VSs>ip %x0,%x1"
21178
+ "xvr<VSs>ip %x0,%x1"
21179
[(set_attr "type" "<VStype_simple>")
21180
(set_attr "fp_type" "<VSfptype_simple>")])
21182
@@ -942,6 +1187,40 @@
21184
[(set_attr "type" "fp")])
21186
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
21187
+(define_insn "vsx_xscvdpspn"
21188
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
21189
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
21190
+ UNSPEC_VSX_CVDPSPN))]
21191
+ "TARGET_XSCVDPSPN"
21192
+ "xscvdpspn %x0,%x1"
21193
+ [(set_attr "type" "fp")])
21195
+(define_insn "vsx_xscvspdpn"
21196
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
21197
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
21198
+ UNSPEC_VSX_CVSPDPN))]
21199
+ "TARGET_XSCVSPDPN"
21200
+ "xscvspdpn %x0,%x1"
21201
+ [(set_attr "type" "fp")])
21203
+(define_insn "vsx_xscvdpspn_scalar"
21204
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
21205
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
21206
+ UNSPEC_VSX_CVDPSPN))]
21207
+ "TARGET_XSCVDPSPN"
21208
+ "xscvdpspn %x0,%x1"
21209
+ [(set_attr "type" "fp")])
21211
+;; Used by direct move to move a SFmode value from GPR to VSX register
21212
+(define_insn "vsx_xscvspdpn_directmove"
21213
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
21214
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
21215
+ UNSPEC_VSX_CVSPDPN))]
21216
+ "TARGET_XSCVSPDPN"
21217
+ "xscvspdpn %x0,%x1"
21218
+ [(set_attr "type" "fp")])
21220
;; Convert from 64-bit to 32-bit types
21221
;; Note, favor the Altivec registers since the usual use of these instructions
21222
;; is in vector converts and we need to use the Altivec vperm instruction.
21223
@@ -1027,73 +1306,21 @@
21224
(set_attr "fp_type" "<VSfptype_simple>")])
21227
-;; Logical and permute operations
21228
-(define_insn "*vsx_and<mode>3"
21229
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21231
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
21232
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
21233
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21234
- "xxland %x0,%x1,%x2"
21235
- [(set_attr "type" "vecsimple")])
21237
-(define_insn "*vsx_ior<mode>3"
21238
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21239
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
21240
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
21241
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21242
- "xxlor %x0,%x1,%x2"
21243
- [(set_attr "type" "vecsimple")])
21245
-(define_insn "*vsx_xor<mode>3"
21246
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21248
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
21249
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
21250
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21251
- "xxlxor %x0,%x1,%x2"
21252
- [(set_attr "type" "vecsimple")])
21254
-(define_insn "*vsx_one_cmpl<mode>2"
21255
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21257
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
21258
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21259
- "xxlnor %x0,%x1,%x1"
21260
- [(set_attr "type" "vecsimple")])
21262
-(define_insn "*vsx_nor<mode>3"
21263
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21266
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
21267
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
21268
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21269
- "xxlnor %x0,%x1,%x2"
21270
- [(set_attr "type" "vecsimple")])
21272
-(define_insn "*vsx_andc<mode>3"
21273
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
21276
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
21277
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
21278
- "VECTOR_MEM_VSX_P (<MODE>mode)"
21279
- "xxlandc %x0,%x1,%x2"
21280
- [(set_attr "type" "vecsimple")])
21283
;; Permute operations
21285
;; Build a V2DF/V2DI vector from two scalars
21286
(define_insn "vsx_concat_<mode>"
21287
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
21289
- [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
21290
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
21291
- UNSPEC_VSX_CONCAT))]
21292
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
21293
+ (vec_concat:VSX_D
21294
+ (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
21295
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
21296
"VECTOR_MEM_VSX_P (<MODE>mode)"
21297
- "xxpermdi %x0,%x1,%x2,0"
21299
+ if (BYTES_BIG_ENDIAN)
21300
+ return "xxpermdi %x0,%x1,%x2,0";
21302
+ return "xxpermdi %x0,%x2,%x1,0";
21304
[(set_attr "type" "vecperm")])
21306
;; Special purpose concat using xxpermdi to glue two single precision values
21307
@@ -1106,9 +1333,161 @@
21308
(match_operand:SF 2 "vsx_register_operand" "f,f")]
21309
UNSPEC_VSX_CONCAT))]
21310
"VECTOR_MEM_VSX_P (V2DFmode)"
21311
- "xxpermdi %x0,%x1,%x2,0"
21313
+ if (BYTES_BIG_ENDIAN)
21314
+ return "xxpermdi %x0,%x1,%x2,0";
21316
+ return "xxpermdi %x0,%x2,%x1,0";
21318
[(set_attr "type" "vecperm")])
21320
+;; xxpermdi for little endian loads and stores. We need several of
21321
+;; these since the form of the PARALLEL differs by mode.
21322
+(define_insn "*vsx_xxpermdi2_le_<mode>"
21323
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
21324
+ (vec_select:VSX_D
21325
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
21326
+ (parallel [(const_int 1) (const_int 0)])))]
21327
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21328
+ "xxpermdi %x0,%x1,%x1,2"
21329
+ [(set_attr "type" "vecperm")])
21331
+(define_insn "*vsx_xxpermdi4_le_<mode>"
21332
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
21333
+ (vec_select:VSX_W
21334
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
21335
+ (parallel [(const_int 2) (const_int 3)
21336
+ (const_int 0) (const_int 1)])))]
21337
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21338
+ "xxpermdi %x0,%x1,%x1,2"
21339
+ [(set_attr "type" "vecperm")])
21341
+(define_insn "*vsx_xxpermdi8_le_V8HI"
21342
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
21344
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
21345
+ (parallel [(const_int 4) (const_int 5)
21346
+ (const_int 6) (const_int 7)
21347
+ (const_int 0) (const_int 1)
21348
+ (const_int 2) (const_int 3)])))]
21349
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
21350
+ "xxpermdi %x0,%x1,%x1,2"
21351
+ [(set_attr "type" "vecperm")])
21353
+(define_insn "*vsx_xxpermdi16_le_V16QI"
21354
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
21355
+ (vec_select:V16QI
21356
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
21357
+ (parallel [(const_int 8) (const_int 9)
21358
+ (const_int 10) (const_int 11)
21359
+ (const_int 12) (const_int 13)
21360
+ (const_int 14) (const_int 15)
21361
+ (const_int 0) (const_int 1)
21362
+ (const_int 2) (const_int 3)
21363
+ (const_int 4) (const_int 5)
21364
+ (const_int 6) (const_int 7)])))]
21365
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
21366
+ "xxpermdi %x0,%x1,%x1,2"
21367
+ [(set_attr "type" "vecperm")])
21369
+;; lxvd2x for little endian loads. We need several of
21370
+;; these since the form of the PARALLEL differs by mode.
21371
+(define_insn "*vsx_lxvd2x2_le_<mode>"
21372
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
21373
+ (vec_select:VSX_D
21374
+ (match_operand:VSX_D 1 "memory_operand" "Z")
21375
+ (parallel [(const_int 1) (const_int 0)])))]
21376
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21378
+ [(set_attr "type" "vecload")])
21380
+(define_insn "*vsx_lxvd2x4_le_<mode>"
21381
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
21382
+ (vec_select:VSX_W
21383
+ (match_operand:VSX_W 1 "memory_operand" "Z")
21384
+ (parallel [(const_int 2) (const_int 3)
21385
+ (const_int 0) (const_int 1)])))]
21386
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21388
+ [(set_attr "type" "vecload")])
21390
+(define_insn "*vsx_lxvd2x8_le_V8HI"
21391
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
21393
+ (match_operand:V8HI 1 "memory_operand" "Z")
21394
+ (parallel [(const_int 4) (const_int 5)
21395
+ (const_int 6) (const_int 7)
21396
+ (const_int 0) (const_int 1)
21397
+ (const_int 2) (const_int 3)])))]
21398
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
21400
+ [(set_attr "type" "vecload")])
21402
+(define_insn "*vsx_lxvd2x16_le_V16QI"
21403
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
21404
+ (vec_select:V16QI
21405
+ (match_operand:V16QI 1 "memory_operand" "Z")
21406
+ (parallel [(const_int 8) (const_int 9)
21407
+ (const_int 10) (const_int 11)
21408
+ (const_int 12) (const_int 13)
21409
+ (const_int 14) (const_int 15)
21410
+ (const_int 0) (const_int 1)
21411
+ (const_int 2) (const_int 3)
21412
+ (const_int 4) (const_int 5)
21413
+ (const_int 6) (const_int 7)])))]
21414
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
21416
+ [(set_attr "type" "vecload")])
21418
+;; stxvd2x for little endian stores. We need several of
21419
+;; these since the form of the PARALLEL differs by mode.
21420
+(define_insn "*vsx_stxvd2x2_le_<mode>"
21421
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
21422
+ (vec_select:VSX_D
21423
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
21424
+ (parallel [(const_int 1) (const_int 0)])))]
21425
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21426
+ "stxvd2x %x1,%y0"
21427
+ [(set_attr "type" "vecstore")])
21429
+(define_insn "*vsx_stxvd2x4_le_<mode>"
21430
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
21431
+ (vec_select:VSX_W
21432
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
21433
+ (parallel [(const_int 2) (const_int 3)
21434
+ (const_int 0) (const_int 1)])))]
21435
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
21436
+ "stxvd2x %x1,%y0"
21437
+ [(set_attr "type" "vecstore")])
21439
+(define_insn "*vsx_stxvd2x8_le_V8HI"
21440
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
21442
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
21443
+ (parallel [(const_int 4) (const_int 5)
21444
+ (const_int 6) (const_int 7)
21445
+ (const_int 0) (const_int 1)
21446
+ (const_int 2) (const_int 3)])))]
21447
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
21448
+ "stxvd2x %x1,%y0"
21449
+ [(set_attr "type" "vecstore")])
21451
+(define_insn "*vsx_stxvd2x16_le_V16QI"
21452
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
21453
+ (vec_select:V16QI
21454
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
21455
+ (parallel [(const_int 8) (const_int 9)
21456
+ (const_int 10) (const_int 11)
21457
+ (const_int 12) (const_int 13)
21458
+ (const_int 14) (const_int 15)
21459
+ (const_int 0) (const_int 1)
21460
+ (const_int 2) (const_int 3)
21461
+ (const_int 4) (const_int 5)
21462
+ (const_int 6) (const_int 7)])))]
21463
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
21464
+ "stxvd2x %x1,%y0"
21465
+ [(set_attr "type" "vecstore")])
21467
;; Set the element of a V2DI/VD2F mode
21468
(define_insn "vsx_set_<mode>"
21469
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
21470
@@ -1118,9 +1497,10 @@
21472
"VECTOR_MEM_VSX_P (<MODE>mode)"
21474
- if (INTVAL (operands[3]) == 0)
21475
+ int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
21476
+ if (INTVAL (operands[3]) == idx_first)
21477
return \"xxpermdi %x0,%x2,%x1,1\";
21478
- else if (INTVAL (operands[3]) == 1)
21479
+ else if (INTVAL (operands[3]) == 1 - idx_first)
21480
return \"xxpermdi %x0,%x1,%x2,0\";
21482
gcc_unreachable ();
21483
@@ -1135,8 +1515,12 @@
21484
[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
21485
"VECTOR_MEM_VSX_P (<MODE>mode)"
21488
gcc_assert (UINTVAL (operands[2]) <= 1);
21489
- operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
21490
+ fldDM = INTVAL (operands[2]) << 1;
21491
+ if (!BYTES_BIG_ENDIAN)
21492
+ fldDM = 3 - fldDM;
21493
+ operands[3] = GEN_INT (fldDM);
21494
return \"xxpermdi %x0,%x1,%x1,%3\";
21496
[(set_attr "type" "vecperm")])
21497
@@ -1149,9 +1533,28 @@
21498
(parallel [(const_int 0)])))]
21499
"VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
21501
- [(set_attr "type" "fpload")
21502
+ [(set (attr "type")
21504
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
21505
+ (const_string "fpload_ux")
21506
+ (const_string "fpload")))
21507
(set_attr "length" "4")])
21509
+;; Optimize extracting element 1 from memory for little endian
21510
+(define_insn "*vsx_extract_<mode>_one_le"
21511
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
21512
+ (vec_select:<VS_scalar>
21513
+ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
21514
+ (parallel [(const_int 1)])))]
21515
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
21516
+ "lxsd%U1x %x0,%y1"
21517
+ [(set (attr "type")
21519
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
21520
+ (const_string "fpload_ux")
21521
+ (const_string "fpload")))
21522
+ (set_attr "length" "4")])
21524
;; Extract a SF element from V4SF
21525
(define_insn_and_split "vsx_extract_v4sf"
21526
[(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
21527
@@ -1172,7 +1575,7 @@
21528
rtx op2 = operands[2];
21529
rtx op3 = operands[3];
21531
- HOST_WIDE_INT ele = INTVAL (op2);
21532
+ HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
21536
@@ -1213,8 +1616,8 @@
21537
if (<MODE>mode != V2DImode)
21539
target = gen_lowpart (V2DImode, target);
21540
- op0 = gen_lowpart (V2DImode, target);
21541
- op1 = gen_lowpart (V2DImode, target);
21542
+ op0 = gen_lowpart (V2DImode, op0);
21543
+ op1 = gen_lowpart (V2DImode, op1);
21546
emit_insn (gen (target, op0, op1, perm0, perm1));
21547
@@ -1483,3 +1886,27 @@
21549
[(set_attr "length" "20")
21550
(set_attr "type" "veccomplex")])
21553
+;; Power8 Vector fusion. The fused ops must be physically adjacent.
21555
+ [(set (match_operand:P 0 "base_reg_operand" "")
21556
+ (match_operand:P 1 "short_cint_operand" ""))
21557
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
21558
+ (mem:VSX_M2 (plus:P (match_dup 0)
21559
+ (match_operand:P 3 "int_reg_operand" ""))))]
21560
+ "TARGET_VSX && TARGET_P8_FUSION"
21561
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
21562
+ [(set_attr "length" "8")
21563
+ (set_attr "type" "vecload")])
21566
+ [(set (match_operand:P 0 "base_reg_operand" "")
21567
+ (match_operand:P 1 "short_cint_operand" ""))
21568
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
21569
+ (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
21570
+ (match_dup 0))))]
21571
+ "TARGET_VSX && TARGET_P8_FUSION"
21572
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
21573
+ [(set_attr "length" "8")
21574
+ (set_attr "type" "vecload")])
21575
--- a/src/gcc/config/rs6000/rs6000.h
21576
+++ b/src/gcc/config/rs6000/rs6000.h
21578
#ifdef HAVE_AS_POWER8
21579
#define ASM_CPU_POWER8_SPEC "-mpower8"
21581
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
21582
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
21586
@@ -164,6 +164,7 @@
21587
%{mcpu=e6500: -me6500} \
21588
%{maltivec: -maltivec} \
21589
%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
21590
+%{mpower8-vector|mcrypto|mdirect-move|mhtm: %{!mcpu*: %(asm_cpu_power8)}} \
21593
#define CPP_DEFAULT_SPEC ""
21594
@@ -277,6 +278,21 @@
21595
#define TARGET_POPCNTD 0
21598
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
21599
+ waitasecond instruction. Allow -mpower8-fusion, since it does not add new
21602
+#ifndef HAVE_AS_POWER8
21603
+#undef TARGET_DIRECT_MOVE
21604
+#undef TARGET_CRYPTO
21606
+#undef TARGET_P8_VECTOR
21607
+#define TARGET_DIRECT_MOVE 0
21608
+#define TARGET_CRYPTO 0
21609
+#define TARGET_HTM 0
21610
+#define TARGET_P8_VECTOR 0
21613
/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If
21614
not, generate the lwsync code as an integer constant. */
21615
#ifdef HAVE_AS_LWSYNC
21616
@@ -386,6 +402,7 @@
21617
#define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET)
21618
#define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN)
21620
+/* Describe the vector unit used for arithmetic operations. */
21621
extern enum rs6000_vector rs6000_vector_unit[];
21623
#define VECTOR_UNIT_NONE_P(MODE) \
21624
@@ -394,12 +411,25 @@
21625
#define VECTOR_UNIT_VSX_P(MODE) \
21626
(rs6000_vector_unit[(MODE)] == VECTOR_VSX)
21628
+#define VECTOR_UNIT_P8_VECTOR_P(MODE) \
21629
+ (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
21631
#define VECTOR_UNIT_ALTIVEC_P(MODE) \
21632
(rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
21634
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \
21635
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
21636
+ (int)VECTOR_VSX, \
21637
+ (int)VECTOR_P8_VECTOR))
21639
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
21640
+ altivec (VMX) or VSX vector instructions. P8 vector support is upwards
21641
+ compatible, so allow it as well, rather than changing all of the uses of the
21643
#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \
21644
- (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC \
21645
- || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
21646
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
21647
+ (int)VECTOR_ALTIVEC, \
21648
+ (int)VECTOR_P8_VECTOR))
21650
/* Describe whether to use VSX loads or Altivec loads. For now, just use the
21651
same unit as the vector unit we are using, but we may want to migrate to
21652
@@ -412,12 +442,21 @@
21653
#define VECTOR_MEM_VSX_P(MODE) \
21654
(rs6000_vector_mem[(MODE)] == VECTOR_VSX)
21656
+#define VECTOR_MEM_P8_VECTOR_P(MODE) \
21657
+ (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
21659
#define VECTOR_MEM_ALTIVEC_P(MODE) \
21660
(rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
21662
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \
21663
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
21664
+ (int)VECTOR_VSX, \
21665
+ (int)VECTOR_P8_VECTOR))
21667
#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \
21668
- (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC \
21669
- || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
21670
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
21671
+ (int)VECTOR_ALTIVEC, \
21672
+ (int)VECTOR_P8_VECTOR))
21674
/* Return the alignment of a given vector type, which is set based on the
21675
vector unit use. VSX for instance can load 32 or 64 bit aligned words
21676
@@ -479,22 +518,41 @@
21677
#define TARGET_FCTIDUZ TARGET_POPCNTD
21678
#define TARGET_FCTIWUZ TARGET_POPCNTD
21680
+#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
21681
+#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
21683
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
21684
+ in power7, so conditionalize them on p8 features. TImode syncs need quad
21685
+ memory support. */
21686
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
21687
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY
21689
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
21690
+ to allocate the SDmode stack slot to get the value into the proper location
21691
+ in the register. */
21692
+#define TARGET_NO_SDMODE_STACK (TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
21694
/* In switching from using target_flags to using rs6000_isa_flags, the options
21695
machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>. For now map
21696
OPTION_MASK_<xxx> back into MASK_<xxx>. */
21697
#define MASK_ALTIVEC OPTION_MASK_ALTIVEC
21698
#define MASK_CMPB OPTION_MASK_CMPB
21699
+#define MASK_CRYPTO OPTION_MASK_CRYPTO
21700
#define MASK_DFP OPTION_MASK_DFP
21701
+#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE
21702
#define MASK_DLMZB OPTION_MASK_DLMZB
21703
#define MASK_EABI OPTION_MASK_EABI
21704
#define MASK_FPRND OPTION_MASK_FPRND
21705
+#define MASK_P8_FUSION OPTION_MASK_P8_FUSION
21706
#define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT
21707
+#define MASK_HTM OPTION_MASK_HTM
21708
#define MASK_ISEL OPTION_MASK_ISEL
21709
#define MASK_MFCRF OPTION_MASK_MFCRF
21710
#define MASK_MFPGPR OPTION_MASK_MFPGPR
21711
#define MASK_MULHW OPTION_MASK_MULHW
21712
#define MASK_MULTIPLE OPTION_MASK_MULTIPLE
21713
#define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE
21714
+#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR
21715
#define MASK_POPCNTB OPTION_MASK_POPCNTB
21716
#define MASK_POPCNTD OPTION_MASK_POPCNTD
21717
#define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT
21718
@@ -505,6 +563,7 @@
21719
#define MASK_STRING OPTION_MASK_STRING
21720
#define MASK_UPDATE OPTION_MASK_UPDATE
21721
#define MASK_VSX OPTION_MASK_VSX
21722
+#define MASK_VSX_TIMODE OPTION_MASK_VSX_TIMODE
21725
#define MASK_POWERPC64 OPTION_MASK_POWERPC64
21726
@@ -558,6 +617,25 @@
21727
|| rs6000_cpu == PROCESSOR_PPC8548)
21730
+/* Whether SF/DF operations are supported on the E500. */
21731
+#define TARGET_SF_SPE (TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT \
21734
+#define TARGET_DF_SPE (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT \
21735
+ && !TARGET_FPRS && TARGET_E500_DOUBLE)
21737
+/* Whether SF/DF operations are supported by by the normal floating point unit
21738
+ (or the vector/scalar unit). */
21739
+#define TARGET_SF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \
21740
+ && TARGET_SINGLE_FLOAT)
21742
+#define TARGET_DF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \
21743
+ && TARGET_DOUBLE_FLOAT)
21745
+/* Whether SF/DF operations are supported by any hardware. */
21746
+#define TARGET_SF_INSN (TARGET_SF_FPR || TARGET_SF_SPE)
21747
+#define TARGET_DF_INSN (TARGET_DF_FPR || TARGET_DF_SPE)
21749
/* Which machine supports the various reciprocal estimate instructions. */
21750
#define TARGET_FRES (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
21751
&& TARGET_FPRS && TARGET_SINGLE_FLOAT)
21752
@@ -595,9 +673,6 @@
21753
#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
21754
(rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
21756
-#define RS6000_RECIP_HIGH_PRECISION_P(MODE) \
21757
- ((MODE) == SFmode || (MODE) == V4SFmode || TARGET_RECIP_PRECISION)
21759
/* The default CPU for TARGET_OPTION_OVERRIDE. */
21760
#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
21762
@@ -842,15 +917,17 @@
21763
in inline functions.
21765
Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
21766
- pointer, which is eventually eliminated in favor of SP or FP. */
21767
+ pointer, which is eventually eliminated in favor of SP or FP.
21769
-#define FIRST_PSEUDO_REGISTER 114
21770
+ The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS. */
21772
+#define FIRST_PSEUDO_REGISTER 117
21774
/* This must be included for pre gcc 3.0 glibc compatibility. */
21775
#define PRE_GCC3_DWARF_FRAME_REGISTERS 77
21777
/* Add 32 dwarf columns for synthetic SPE registers. */
21778
-#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 1) + 32)
21779
+#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 4) + 32)
21781
/* The SPE has an additional 32 synthetic registers, with DWARF debug
21782
info numbering for these registers starting at 1200. While eh_frame
21783
@@ -866,7 +943,7 @@
21784
We must map them here to avoid huge unwinder tables mostly consisting
21785
of unused space. */
21786
#define DWARF_REG_TO_UNWIND_COLUMN(r) \
21787
- ((r) > 1200 ? ((r) - 1200 + FIRST_PSEUDO_REGISTER - 1) : (r))
21788
+ ((r) > 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r))
21790
/* Use standard DWARF numbering for DWARF debugging information. */
21791
#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
21792
@@ -906,7 +983,7 @@
21793
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21794
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21797
+ , 1, 1, 1, 1, 1, 1 \
21800
/* 1 for registers not available across function calls.
21801
@@ -926,7 +1003,7 @@
21802
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21803
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21806
+ , 1, 1, 1, 1, 1, 1 \
21809
/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
21810
@@ -945,7 +1022,7 @@
21811
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21812
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
21815
+ , 0, 0, 0, 0, 0, 0 \
21818
#define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
21819
@@ -984,6 +1061,9 @@
21820
vrsave, vscr (fixed)
21821
spe_acc, spefscr (fixed)
21829
@@ -1004,7 +1084,9 @@
21831
#define REG_ALLOC_ORDER \
21833
- 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \
21834
+ /* move fr13 (ie 45) later, so if we need TFmode, it does */ \
21835
+ /* not use fr14 which is a saved register. */ \
21836
+ 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \
21838
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
21839
50, 49, 48, 47, 46, \
21840
@@ -1023,7 +1105,7 @@
21841
96, 95, 94, 93, 92, 91, \
21842
108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, \
21845
+ 111, 112, 113, 114, 115, 116 \
21848
/* True if register is floating-point. */
21849
@@ -1064,8 +1146,11 @@
21850
#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
21852
/* Alternate name for any vector register supporting logical operations, no
21853
- matter which instruction set(s) are available. */
21854
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
21855
+ matter which instruction set(s) are available. Allow GPRs as well as the
21856
+ vector registers. */
21857
+#define VLOGICAL_REGNO_P(N) \
21858
+ (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N) \
21859
+ || (TARGET_VSX && FP_REGNO_P (N))) \
21861
/* Return number of consecutive hard regs needed starting at reg REGNO
21862
to hold something of mode MODE. */
21863
@@ -1125,28 +1210,32 @@
21864
/* Value is 1 if it is a good idea to tie two pseudo registers
21865
when one has mode MODE1 and one has mode MODE2.
21866
If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
21867
- for any hard reg, then this must be 0 for correct output. */
21868
-#define MODES_TIEABLE_P(MODE1, MODE2) \
21869
- (SCALAR_FLOAT_MODE_P (MODE1) \
21870
+ for any hard reg, then this must be 0 for correct output.
21872
+ PTImode cannot tie with other modes because PTImode is restricted to even
21873
+ GPR registers, and TImode can go in any GPR as well as VSX registers (PR
21875
+#define MODES_TIEABLE_P(MODE1, MODE2) \
21876
+ ((MODE1) == PTImode \
21877
+ ? (MODE2) == PTImode \
21878
+ : (MODE2) == PTImode \
21880
+ : SCALAR_FLOAT_MODE_P (MODE1) \
21881
? SCALAR_FLOAT_MODE_P (MODE2) \
21882
: SCALAR_FLOAT_MODE_P (MODE2) \
21883
- ? SCALAR_FLOAT_MODE_P (MODE1) \
21885
: GET_MODE_CLASS (MODE1) == MODE_CC \
21886
? GET_MODE_CLASS (MODE2) == MODE_CC \
21887
: GET_MODE_CLASS (MODE2) == MODE_CC \
21888
- ? GET_MODE_CLASS (MODE1) == MODE_CC \
21890
: SPE_VECTOR_MODE (MODE1) \
21891
? SPE_VECTOR_MODE (MODE2) \
21892
: SPE_VECTOR_MODE (MODE2) \
21893
- ? SPE_VECTOR_MODE (MODE1) \
21894
- : ALTIVEC_VECTOR_MODE (MODE1) \
21895
- ? ALTIVEC_VECTOR_MODE (MODE2) \
21896
- : ALTIVEC_VECTOR_MODE (MODE2) \
21897
- ? ALTIVEC_VECTOR_MODE (MODE1) \
21899
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
21900
? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
21901
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
21902
- ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
21906
/* Post-reload, we can't use any new AltiVec registers, as we already
21907
@@ -1240,6 +1329,7 @@
21915
@@ -1270,6 +1360,7 @@
21920
"NON_SPECIAL_REGS", \
21923
@@ -1299,6 +1390,7 @@
21924
{ 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */ \
21925
{ 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */ \
21926
{ 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */ \
21927
+ { 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, /* SPR_REGS */ \
21928
{ 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \
21929
{ 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */ \
21930
{ 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */ \
21931
@@ -1309,7 +1401,7 @@
21932
{ 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */ \
21933
{ 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000 }, /* NON_FLOAT_REGS */ \
21934
{ 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */ \
21935
- { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0003ffff } /* ALL_REGS */ \
21936
+ { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0007ffff } /* ALL_REGS */ \
21939
/* The same information, inverted:
21940
@@ -1337,7 +1429,18 @@
21941
RS6000_CONSTRAINT_wa, /* Any VSX register */
21942
RS6000_CONSTRAINT_wd, /* VSX register for V2DF */
21943
RS6000_CONSTRAINT_wf, /* VSX register for V4SF */
21944
+ RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */
21945
+ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */
21946
+ RS6000_CONSTRAINT_wm, /* VSX register for direct move */
21947
+ RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */
21948
RS6000_CONSTRAINT_ws, /* VSX register for DF */
21949
+ RS6000_CONSTRAINT_wt, /* VSX register for TImode */
21950
+ RS6000_CONSTRAINT_wu, /* Altivec register for float load/stores. */
21951
+ RS6000_CONSTRAINT_wv, /* Altivec register for double load/stores. */
21952
+ RS6000_CONSTRAINT_ww, /* FP or VSX register for vsx float ops. */
21953
+ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
21954
+ RS6000_CONSTRAINT_wy, /* VSX register for SF */
21955
+ RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */
21956
RS6000_CONSTRAINT_MAX
21959
@@ -1425,21 +1528,14 @@
21961
#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 || flag_asan != 0)
21963
-/* Size of the outgoing register save area */
21964
-#define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX \
21965
- || DEFAULT_ABI == ABI_DARWIN) \
21966
- ? (TARGET_64BIT ? 64 : 32) \
21969
/* Size of the fixed area on the stack */
21970
#define RS6000_SAVE_AREA \
21971
- (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_DARWIN) ? 24 : 8) \
21972
+ ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24) \
21973
<< (TARGET_64BIT ? 1 : 0))
21975
-/* MEM representing address to save the TOC register */
21976
-#define RS6000_SAVE_TOC gen_rtx_MEM (Pmode, \
21977
- plus_constant (Pmode, stack_pointer_rtx, \
21978
- (TARGET_32BIT ? 20 : 40)))
21979
+/* Stack offset for toc save slot. */
21980
+#define RS6000_TOC_SAVE_SLOT \
21981
+ ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0))
21983
/* Align an address */
21984
#define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
21985
@@ -1489,7 +1585,7 @@
21986
/* Define this if stack space is still allocated for a parameter passed
21987
in a register. The value is the number of bytes allocated to this
21989
-#define REG_PARM_STACK_SPACE(FNDECL) RS6000_REG_SAVE
21990
+#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
21992
/* Define this if the above stack space is to be considered part of the
21993
space allocated by the caller. */
21994
@@ -1522,7 +1618,7 @@
21995
NONLOCAL needs twice Pmode to maintain both backchain and SP. */
21996
#define STACK_SAVEAREA_MODE(LEVEL) \
21997
(LEVEL == SAVE_FUNCTION ? VOIDmode \
21998
- : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : TImode) : Pmode)
21999
+ : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode)
22001
/* Minimum and maximum general purpose registers used to hold arguments. */
22002
#define GP_ARG_MIN_REG 3
22003
@@ -1533,9 +1629,8 @@
22004
#define FP_ARG_MIN_REG 33
22005
#define FP_ARG_AIX_MAX_REG 45
22006
#define FP_ARG_V4_MAX_REG 40
22007
-#define FP_ARG_MAX_REG ((DEFAULT_ABI == ABI_AIX \
22008
- || DEFAULT_ABI == ABI_DARWIN) \
22009
- ? FP_ARG_AIX_MAX_REG : FP_ARG_V4_MAX_REG)
22010
+#define FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4 \
22011
+ ? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG)
22012
#define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
22014
/* Minimum and maximum AltiVec registers used to hold arguments. */
22015
@@ -1543,10 +1638,17 @@
22016
#define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
22017
#define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
22019
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument. */
22020
+#define AGGR_ARG_NUM_REG 8
22022
/* Return registers */
22023
#define GP_ARG_RETURN GP_ARG_MIN_REG
22024
#define FP_ARG_RETURN FP_ARG_MIN_REG
22025
#define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
22026
+#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN \
22027
+ : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
22028
+#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? ALTIVEC_ARG_RETURN \
22029
+ : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
22031
/* Flags for the call/call_value rtl operations set up by function_arg */
22032
#define CALL_NORMAL 0x00000000 /* no special processing */
22033
@@ -1566,8 +1668,10 @@
22034
On RS/6000, this is r3, fp1, and v2 (for AltiVec). */
22035
#define FUNCTION_VALUE_REGNO_P(N) \
22036
((N) == GP_ARG_RETURN \
22037
- || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT && TARGET_FPRS) \
22038
- || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
22039
+ || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN \
22040
+ && TARGET_HARD_FLOAT && TARGET_FPRS) \
22041
+ || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN \
22042
+ && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
22044
/* 1 if N is a possible register number for function argument passing.
22045
On RS/6000, these are r3-r10 and fp1-fp13.
22046
@@ -1691,11 +1795,8 @@
22047
/* Number of bytes into the frame return addresses can be found. See
22048
rs6000_stack_info in rs6000.c for more information on how the different
22049
abi's store the return address. */
22050
-#define RETURN_ADDRESS_OFFSET \
22051
- ((DEFAULT_ABI == ABI_AIX \
22052
- || DEFAULT_ABI == ABI_DARWIN) ? (TARGET_32BIT ? 8 : 16) : \
22053
- (DEFAULT_ABI == ABI_V4) ? 4 : \
22054
- (internal_error ("RETURN_ADDRESS_OFFSET not supported"), 0))
22055
+#define RETURN_ADDRESS_OFFSET \
22056
+ ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0))
22058
/* The current return address is in link register (65). The return address
22059
of anything farther back is accessed normally at an offset of 8 from the
22060
@@ -2215,6 +2316,9 @@
22061
&rs6000_reg_names[111][0], /* spe_acc */ \
22062
&rs6000_reg_names[112][0], /* spefscr */ \
22063
&rs6000_reg_names[113][0], /* sfp */ \
22064
+ &rs6000_reg_names[114][0], /* tfhar */ \
22065
+ &rs6000_reg_names[115][0], /* tfiar */ \
22066
+ &rs6000_reg_names[116][0], /* texasr */ \
22069
/* Table of additional register names to use in user input. */
22070
@@ -2268,7 +2372,9 @@
22071
{"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96}, \
22072
{"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100}, \
22073
{"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104}, \
22074
- {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108} }
22075
+ {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108}, \
22076
+ /* Transactional Memory Facility (HTM) Registers. */ \
22077
+ {"tfhar", 114}, {"tfiar", 115}, {"texasr", 116} }
22079
/* This is how to output an element of a case-vector that is relative. */
22081
@@ -2357,7 +2463,12 @@
22082
#define RS6000_BTC_ATTR_MASK 0x00000700 /* Mask of the attributes. */
22084
/* Miscellaneous information. */
22085
-#define RS6000_BTC_OVERLOADED 0x4000000 /* function is overloaded. */
22086
+#define RS6000_BTC_SPR 0x01000000 /* function references SPRs. */
22087
+#define RS6000_BTC_VOID 0x02000000 /* function has no return value. */
22088
+#define RS6000_BTC_OVERLOADED 0x04000000 /* function is overloaded. */
22089
+#define RS6000_BTC_32BIT 0x08000000 /* function references SPRs. */
22090
+#define RS6000_BTC_64BIT 0x10000000 /* function references SPRs. */
22091
+#define RS6000_BTC_MISC_MASK 0x1f000000 /* Mask of the misc info. */
22093
/* Convenience macros to document the instruction type. */
22094
#define RS6000_BTC_MEM RS6000_BTC_MISC /* load/store touches mem. */
22095
@@ -2369,6 +2480,9 @@
22096
#define RS6000_BTM_ALWAYS 0 /* Always enabled. */
22097
#define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */
22098
#define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */
22099
+#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */
22100
+#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */
22101
+#define RS6000_BTM_HTM MASK_HTM /* hardware TM funcs. */
22102
#define RS6000_BTM_SPE MASK_STRING /* E500 */
22103
#define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */
22104
#define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */
22105
@@ -2380,10 +2494,13 @@
22107
#define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \
22109
+ | RS6000_BTM_P8_VECTOR \
22110
+ | RS6000_BTM_CRYPTO \
22112
| RS6000_BTM_FRES \
22113
| RS6000_BTM_FRSQRTE \
22114
| RS6000_BTM_FRSQRTES \
22115
+ | RS6000_BTM_HTM \
22116
| RS6000_BTM_POPCNTD \
22119
@@ -2395,6 +2512,7 @@
22120
#undef RS6000_BUILTIN_A
22121
#undef RS6000_BUILTIN_D
22122
#undef RS6000_BUILTIN_E
22123
+#undef RS6000_BUILTIN_H
22124
#undef RS6000_BUILTIN_P
22125
#undef RS6000_BUILTIN_Q
22126
#undef RS6000_BUILTIN_S
22127
@@ -2406,6 +2524,7 @@
22128
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22129
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22130
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22131
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22132
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22133
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22134
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
22135
@@ -2424,6 +2543,7 @@
22136
#undef RS6000_BUILTIN_A
22137
#undef RS6000_BUILTIN_D
22138
#undef RS6000_BUILTIN_E
22139
+#undef RS6000_BUILTIN_H
22140
#undef RS6000_BUILTIN_P
22141
#undef RS6000_BUILTIN_Q
22142
#undef RS6000_BUILTIN_S
22143
--- a/src/gcc/config/rs6000/altivec.md
22144
+++ b/src/gcc/config/rs6000/altivec.md
22145
@@ -41,15 +41,11 @@
22158
+ UNSPEC_VPACK_SIGN_SIGN_SAT
22159
+ UNSPEC_VPACK_SIGN_UNS_SAT
22160
+ UNSPEC_VPACK_UNS_UNS_SAT
22161
+ UNSPEC_VPACK_UNS_UNS_MOD
22165
@@ -71,12 +67,10 @@
22170
+ UNSPEC_VUNPACK_HI_SIGN
22171
+ UNSPEC_VUNPACK_LO_SIGN
22180
@@ -134,6 +128,7 @@
22187
(define_c_enum "unspecv"
22188
@@ -146,6 +141,8 @@
22191
(define_mode_iterator VI [V4SI V8HI V16QI])
22192
+;; Like VI, but add ISA 2.07 integer vector ops
22193
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
22194
;; Short vec in modes
22195
(define_mode_iterator VIshort [V8HI V16QI])
22197
@@ -159,9 +156,19 @@
22198
;; Like VM, except don't do TImode
22199
(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
22201
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
22202
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
22203
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
22204
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
22205
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
22206
+ (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
22207
+ (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
22208
+ (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
22210
+;; Vector pack/unpack
22211
+(define_mode_iterator VP [V2DI V4SI V8HI])
22212
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
22213
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
22214
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
22216
;; Vector move instructions.
22217
(define_insn "*altivec_mov<mode>"
22218
[(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v")
22219
@@ -378,10 +385,10 @@
22222
(define_insn "add<mode>3"
22223
- [(set (match_operand:VI 0 "register_operand" "=v")
22224
- (plus:VI (match_operand:VI 1 "register_operand" "v")
22225
- (match_operand:VI 2 "register_operand" "v")))]
22227
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22228
+ (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
22229
+ (match_operand:VI2 2 "register_operand" "v")))]
22231
"vaddu<VI_char>m %0,%1,%2"
22232
[(set_attr "type" "vecsimple")])
22234
@@ -398,17 +405,17 @@
22235
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
22236
(match_operand:V4SI 2 "register_operand" "v")]
22239
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
22241
[(set_attr "type" "vecsimple")])
22243
(define_insn "altivec_vaddu<VI_char>s"
22244
[(set (match_operand:VI 0 "register_operand" "=v")
22245
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
22246
- (match_operand:VI 2 "register_operand" "v")]
22247
+ (match_operand:VI 2 "register_operand" "v")]
22249
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22252
"vaddu<VI_char>s %0,%1,%2"
22253
[(set_attr "type" "vecsimple")])
22255
@@ -418,16 +425,16 @@
22256
(match_operand:VI 2 "register_operand" "v")]
22258
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22260
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
22261
"vadds<VI_char>s %0,%1,%2"
22262
[(set_attr "type" "vecsimple")])
22265
(define_insn "sub<mode>3"
22266
- [(set (match_operand:VI 0 "register_operand" "=v")
22267
- (minus:VI (match_operand:VI 1 "register_operand" "v")
22268
- (match_operand:VI 2 "register_operand" "v")))]
22270
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22271
+ (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
22272
+ (match_operand:VI2 2 "register_operand" "v")))]
22274
"vsubu<VI_char>m %0,%1,%2"
22275
[(set_attr "type" "vecsimple")])
22277
@@ -444,7 +451,7 @@
22278
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
22279
(match_operand:V4SI 2 "register_operand" "v")]
22282
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
22284
[(set_attr "type" "vecsimple")])
22286
@@ -454,7 +461,7 @@
22287
(match_operand:VI 2 "register_operand" "v")]
22289
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22291
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
22292
"vsubu<VI_char>s %0,%1,%2"
22293
[(set_attr "type" "vecsimple")])
22295
@@ -464,7 +471,7 @@
22296
(match_operand:VI 2 "register_operand" "v")]
22298
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22300
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
22301
"vsubs<VI_char>s %0,%1,%2"
22302
[(set_attr "type" "vecsimple")])
22304
@@ -483,7 +490,7 @@
22305
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
22306
(match_operand:VI 2 "register_operand" "v")]
22309
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
22310
"vavgs<VI_char> %0,%1,%2"
22311
[(set_attr "type" "vecsimple")])
22313
@@ -492,31 +499,31 @@
22314
(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
22315
(match_operand:V4SF 2 "register_operand" "v")]
22318
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
22320
[(set_attr "type" "veccmp")])
22322
(define_insn "*altivec_eq<mode>"
22323
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
22324
- (eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
22325
- (match_operand:VI 2 "altivec_register_operand" "v")))]
22327
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
22328
+ (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
22329
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
22331
"vcmpequ<VI_char> %0,%1,%2"
22332
[(set_attr "type" "veccmp")])
22334
(define_insn "*altivec_gt<mode>"
22335
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
22336
- (gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
22337
- (match_operand:VI 2 "altivec_register_operand" "v")))]
22339
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
22340
+ (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
22341
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
22343
"vcmpgts<VI_char> %0,%1,%2"
22344
[(set_attr "type" "veccmp")])
22346
(define_insn "*altivec_gtu<mode>"
22347
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
22348
- (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
22349
- (match_operand:VI 2 "altivec_register_operand" "v")))]
22351
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
22352
+ (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
22353
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
22355
"vcmpgtu<VI_char> %0,%1,%2"
22356
[(set_attr "type" "veccmp")])
22358
@@ -642,7 +649,7 @@
22359
convert_move (small_swap, swap, 0);
22361
low_product = gen_reg_rtx (V4SImode);
22362
- emit_insn (gen_vec_widen_umult_odd_v8hi (low_product, one, two));
22363
+ emit_insn (gen_altivec_vmulouh (low_product, one, two));
22365
high_product = gen_reg_rtx (V4SImode);
22366
emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
22367
@@ -669,11 +676,19 @@
22368
emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2]));
22369
emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2]));
22371
- emit_insn (gen_altivec_vmrghw (high, even, odd));
22372
- emit_insn (gen_altivec_vmrglw (low, even, odd));
22373
+ if (BYTES_BIG_ENDIAN)
22375
+ emit_insn (gen_altivec_vmrghw (high, even, odd));
22376
+ emit_insn (gen_altivec_vmrglw (low, even, odd));
22377
+ emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
22381
+ emit_insn (gen_altivec_vmrghw (high, odd, even));
22382
+ emit_insn (gen_altivec_vmrglw (low, odd, even));
22383
+ emit_insn (gen_altivec_vpkuwum (operands[0], low, high));
22386
- emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
22391
@@ -744,18 +759,18 @@
22394
(define_insn "umax<mode>3"
22395
- [(set (match_operand:VI 0 "register_operand" "=v")
22396
- (umax:VI (match_operand:VI 1 "register_operand" "v")
22397
- (match_operand:VI 2 "register_operand" "v")))]
22399
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22400
+ (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
22401
+ (match_operand:VI2 2 "register_operand" "v")))]
22403
"vmaxu<VI_char> %0,%1,%2"
22404
[(set_attr "type" "vecsimple")])
22406
(define_insn "smax<mode>3"
22407
- [(set (match_operand:VI 0 "register_operand" "=v")
22408
- (smax:VI (match_operand:VI 1 "register_operand" "v")
22409
- (match_operand:VI 2 "register_operand" "v")))]
22411
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22412
+ (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
22413
+ (match_operand:VI2 2 "register_operand" "v")))]
22415
"vmaxs<VI_char> %0,%1,%2"
22416
[(set_attr "type" "vecsimple")])
22418
@@ -768,18 +783,18 @@
22419
[(set_attr "type" "veccmp")])
22421
(define_insn "umin<mode>3"
22422
- [(set (match_operand:VI 0 "register_operand" "=v")
22423
- (umin:VI (match_operand:VI 1 "register_operand" "v")
22424
- (match_operand:VI 2 "register_operand" "v")))]
22426
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22427
+ (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
22428
+ (match_operand:VI2 2 "register_operand" "v")))]
22430
"vminu<VI_char> %0,%1,%2"
22431
[(set_attr "type" "vecsimple")])
22433
(define_insn "smin<mode>3"
22434
- [(set (match_operand:VI 0 "register_operand" "=v")
22435
- (smin:VI (match_operand:VI 1 "register_operand" "v")
22436
- (match_operand:VI 2 "register_operand" "v")))]
22438
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22439
+ (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
22440
+ (match_operand:VI2 2 "register_operand" "v")))]
22442
"vmins<VI_char> %0,%1,%2"
22443
[(set_attr "type" "vecsimple")])
22445
@@ -935,7 +950,136 @@
22447
[(set_attr "type" "vecperm")])
22449
-(define_insn "vec_widen_umult_even_v16qi"
22450
+;; Power8 vector merge even/odd
22451
+(define_insn "p8_vmrgew"
22452
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
22455
+ (match_operand:V4SI 1 "register_operand" "v")
22456
+ (match_operand:V4SI 2 "register_operand" "v"))
22457
+ (parallel [(const_int 0) (const_int 4)
22458
+ (const_int 2) (const_int 6)])))]
22459
+ "TARGET_P8_VECTOR"
22460
+ "vmrgew %0,%1,%2"
22461
+ [(set_attr "type" "vecperm")])
22463
+(define_insn "p8_vmrgow"
22464
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
22467
+ (match_operand:V4SI 1 "register_operand" "v")
22468
+ (match_operand:V4SI 2 "register_operand" "v"))
22469
+ (parallel [(const_int 1) (const_int 5)
22470
+ (const_int 3) (const_int 7)])))]
22471
+ "TARGET_P8_VECTOR"
22472
+ "vmrgow %0,%1,%2"
22473
+ [(set_attr "type" "vecperm")])
22475
+(define_expand "vec_widen_umult_even_v16qi"
22476
+ [(use (match_operand:V8HI 0 "register_operand" ""))
22477
+ (use (match_operand:V16QI 1 "register_operand" ""))
22478
+ (use (match_operand:V16QI 2 "register_operand" ""))]
22481
+ if (BYTES_BIG_ENDIAN)
22482
+ emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
22484
+ emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
22488
+(define_expand "vec_widen_smult_even_v16qi"
22489
+ [(use (match_operand:V8HI 0 "register_operand" ""))
22490
+ (use (match_operand:V16QI 1 "register_operand" ""))
22491
+ (use (match_operand:V16QI 2 "register_operand" ""))]
22494
+ if (BYTES_BIG_ENDIAN)
22495
+ emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
22497
+ emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
22501
+(define_expand "vec_widen_umult_even_v8hi"
22502
+ [(use (match_operand:V4SI 0 "register_operand" ""))
22503
+ (use (match_operand:V8HI 1 "register_operand" ""))
22504
+ (use (match_operand:V8HI 2 "register_operand" ""))]
22507
+ if (BYTES_BIG_ENDIAN)
22508
+ emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
22510
+ emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
22514
+(define_expand "vec_widen_smult_even_v8hi"
22515
+ [(use (match_operand:V4SI 0 "register_operand" ""))
22516
+ (use (match_operand:V8HI 1 "register_operand" ""))
22517
+ (use (match_operand:V8HI 2 "register_operand" ""))]
22520
+ if (BYTES_BIG_ENDIAN)
22521
+ emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
22523
+ emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
22527
+(define_expand "vec_widen_umult_odd_v16qi"
22528
+ [(use (match_operand:V8HI 0 "register_operand" ""))
22529
+ (use (match_operand:V16QI 1 "register_operand" ""))
22530
+ (use (match_operand:V16QI 2 "register_operand" ""))]
22533
+ if (BYTES_BIG_ENDIAN)
22534
+ emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
22536
+ emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
22540
+(define_expand "vec_widen_smult_odd_v16qi"
22541
+ [(use (match_operand:V8HI 0 "register_operand" ""))
22542
+ (use (match_operand:V16QI 1 "register_operand" ""))
22543
+ (use (match_operand:V16QI 2 "register_operand" ""))]
22546
+ if (BYTES_BIG_ENDIAN)
22547
+ emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
22549
+ emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
22553
+(define_expand "vec_widen_umult_odd_v8hi"
22554
+ [(use (match_operand:V4SI 0 "register_operand" ""))
22555
+ (use (match_operand:V8HI 1 "register_operand" ""))
22556
+ (use (match_operand:V8HI 2 "register_operand" ""))]
22559
+ if (BYTES_BIG_ENDIAN)
22560
+ emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
22562
+ emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
22566
+(define_expand "vec_widen_smult_odd_v8hi"
22567
+ [(use (match_operand:V4SI 0 "register_operand" ""))
22568
+ (use (match_operand:V8HI 1 "register_operand" ""))
22569
+ (use (match_operand:V8HI 2 "register_operand" ""))]
22572
+ if (BYTES_BIG_ENDIAN)
22573
+ emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
22575
+ emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
22579
+(define_insn "altivec_vmuleub"
22580
[(set (match_operand:V8HI 0 "register_operand" "=v")
22581
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
22582
(match_operand:V16QI 2 "register_operand" "v")]
22583
@@ -944,43 +1088,25 @@
22585
[(set_attr "type" "veccomplex")])
22587
-(define_insn "vec_widen_smult_even_v16qi"
22588
+(define_insn "altivec_vmuloub"
22589
[(set (match_operand:V8HI 0 "register_operand" "=v")
22590
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
22591
(match_operand:V16QI 2 "register_operand" "v")]
22592
- UNSPEC_VMULESB))]
22593
+ UNSPEC_VMULOUB))]
22595
- "vmulesb %0,%1,%2"
22596
+ "vmuloub %0,%1,%2"
22597
[(set_attr "type" "veccomplex")])
22599
-(define_insn "vec_widen_umult_even_v8hi"
22600
- [(set (match_operand:V4SI 0 "register_operand" "=v")
22601
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22602
- (match_operand:V8HI 2 "register_operand" "v")]
22603
- UNSPEC_VMULEUH))]
22605
- "vmuleuh %0,%1,%2"
22606
- [(set_attr "type" "veccomplex")])
22608
-(define_insn "vec_widen_smult_even_v8hi"
22609
- [(set (match_operand:V4SI 0 "register_operand" "=v")
22610
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22611
- (match_operand:V8HI 2 "register_operand" "v")]
22612
- UNSPEC_VMULESH))]
22614
- "vmulesh %0,%1,%2"
22615
- [(set_attr "type" "veccomplex")])
22617
-(define_insn "vec_widen_umult_odd_v16qi"
22618
+(define_insn "altivec_vmulesb"
22619
[(set (match_operand:V8HI 0 "register_operand" "=v")
22620
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
22621
(match_operand:V16QI 2 "register_operand" "v")]
22622
- UNSPEC_VMULOUB))]
22623
+ UNSPEC_VMULESB))]
22625
- "vmuloub %0,%1,%2"
22626
+ "vmulesb %0,%1,%2"
22627
[(set_attr "type" "veccomplex")])
22629
-(define_insn "vec_widen_smult_odd_v16qi"
22630
+(define_insn "altivec_vmulosb"
22631
[(set (match_operand:V8HI 0 "register_operand" "=v")
22632
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
22633
(match_operand:V16QI 2 "register_operand" "v")]
22634
@@ -989,167 +1115,124 @@
22636
[(set_attr "type" "veccomplex")])
22638
-(define_insn "vec_widen_umult_odd_v8hi"
22639
+(define_insn "altivec_vmuleuh"
22640
[(set (match_operand:V4SI 0 "register_operand" "=v")
22641
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22642
(match_operand:V8HI 2 "register_operand" "v")]
22643
+ UNSPEC_VMULEUH))]
22645
+ "vmuleuh %0,%1,%2"
22646
+ [(set_attr "type" "veccomplex")])
22648
+(define_insn "altivec_vmulouh"
22649
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
22650
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22651
+ (match_operand:V8HI 2 "register_operand" "v")]
22655
[(set_attr "type" "veccomplex")])
22657
-(define_insn "vec_widen_smult_odd_v8hi"
22658
+(define_insn "altivec_vmulesh"
22659
[(set (match_operand:V4SI 0 "register_operand" "=v")
22660
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22661
(match_operand:V8HI 2 "register_operand" "v")]
22662
+ UNSPEC_VMULESH))]
22664
+ "vmulesh %0,%1,%2"
22665
+ [(set_attr "type" "veccomplex")])
22667
+(define_insn "altivec_vmulosh"
22668
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
22669
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
22670
+ (match_operand:V8HI 2 "register_operand" "v")]
22674
[(set_attr "type" "veccomplex")])
22677
-;; logical ops. Have the logical ops follow the memory ops in
22678
-;; terms of whether to prefer VSX or Altivec
22680
-(define_insn "*altivec_and<mode>3"
22681
- [(set (match_operand:VM 0 "register_operand" "=v")
22682
- (and:VM (match_operand:VM 1 "register_operand" "v")
22683
- (match_operand:VM 2 "register_operand" "v")))]
22684
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22686
- [(set_attr "type" "vecsimple")])
22688
-(define_insn "*altivec_ior<mode>3"
22689
- [(set (match_operand:VM 0 "register_operand" "=v")
22690
- (ior:VM (match_operand:VM 1 "register_operand" "v")
22691
- (match_operand:VM 2 "register_operand" "v")))]
22692
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22694
- [(set_attr "type" "vecsimple")])
22696
-(define_insn "*altivec_xor<mode>3"
22697
- [(set (match_operand:VM 0 "register_operand" "=v")
22698
- (xor:VM (match_operand:VM 1 "register_operand" "v")
22699
- (match_operand:VM 2 "register_operand" "v")))]
22700
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22702
- [(set_attr "type" "vecsimple")])
22704
-(define_insn "*altivec_one_cmpl<mode>2"
22705
- [(set (match_operand:VM 0 "register_operand" "=v")
22706
- (not:VM (match_operand:VM 1 "register_operand" "v")))]
22707
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22709
- [(set_attr "type" "vecsimple")])
22711
-(define_insn "*altivec_nor<mode>3"
22712
- [(set (match_operand:VM 0 "register_operand" "=v")
22713
- (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
22714
- (match_operand:VM 2 "register_operand" "v"))))]
22715
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22717
- [(set_attr "type" "vecsimple")])
22719
-(define_insn "*altivec_andc<mode>3"
22720
- [(set (match_operand:VM 0 "register_operand" "=v")
22721
- (and:VM (not:VM (match_operand:VM 2 "register_operand" "v"))
22722
- (match_operand:VM 1 "register_operand" "v")))]
22723
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
22725
- [(set_attr "type" "vecsimple")])
22727
-(define_insn "altivec_vpkuhum"
22728
- [(set (match_operand:V16QI 0 "register_operand" "=v")
22729
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
22730
- (match_operand:V8HI 2 "register_operand" "v")]
22731
- UNSPEC_VPKUHUM))]
22733
- "vpkuhum %0,%1,%2"
22734
- [(set_attr "type" "vecperm")])
22736
-(define_insn "altivec_vpkuwum"
22737
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22738
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
22739
- (match_operand:V4SI 2 "register_operand" "v")]
22740
- UNSPEC_VPKUWUM))]
22742
- "vpkuwum %0,%1,%2"
22743
- [(set_attr "type" "vecperm")])
22745
+;; Vector pack/unpack
22746
(define_insn "altivec_vpkpx"
22747
[(set (match_operand:V8HI 0 "register_operand" "=v")
22748
(unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
22749
(match_operand:V4SI 2 "register_operand" "v")]
22755
+ if (BYTES_BIG_ENDIAN)
22756
+ return \"vpkpx %0,%1,%2\";
22758
+ return \"vpkpx %0,%2,%1\";
22760
[(set_attr "type" "vecperm")])
22762
-(define_insn "altivec_vpkshss"
22763
- [(set (match_operand:V16QI 0 "register_operand" "=v")
22764
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
22765
- (match_operand:V8HI 2 "register_operand" "v")]
22767
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22769
- "vpkshss %0,%1,%2"
22770
+(define_insn "altivec_vpks<VI_char>ss"
22771
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
22772
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
22773
+ (match_operand:VP 2 "register_operand" "v")]
22774
+ UNSPEC_VPACK_SIGN_SIGN_SAT))]
22778
+ if (BYTES_BIG_ENDIAN)
22779
+ return \"vpks<VI_char>ss %0,%1,%2\";
22781
+ return \"vpks<VI_char>ss %0,%2,%1\";
22783
[(set_attr "type" "vecperm")])
22785
-(define_insn "altivec_vpkswss"
22786
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22787
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
22788
- (match_operand:V4SI 2 "register_operand" "v")]
22790
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22792
- "vpkswss %0,%1,%2"
22793
+(define_insn "altivec_vpks<VI_char>us"
22794
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
22795
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
22796
+ (match_operand:VP 2 "register_operand" "v")]
22797
+ UNSPEC_VPACK_SIGN_UNS_SAT))]
22801
+ if (BYTES_BIG_ENDIAN)
22802
+ return \"vpks<VI_char>us %0,%1,%2\";
22804
+ return \"vpks<VI_char>us %0,%2,%1\";
22806
[(set_attr "type" "vecperm")])
22808
-(define_insn "altivec_vpkuhus"
22809
- [(set (match_operand:V16QI 0 "register_operand" "=v")
22810
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
22811
- (match_operand:V8HI 2 "register_operand" "v")]
22813
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22815
- "vpkuhus %0,%1,%2"
22816
+(define_insn "altivec_vpku<VI_char>us"
22817
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
22818
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
22819
+ (match_operand:VP 2 "register_operand" "v")]
22820
+ UNSPEC_VPACK_UNS_UNS_SAT))]
22824
+ if (BYTES_BIG_ENDIAN)
22825
+ return \"vpku<VI_char>us %0,%1,%2\";
22827
+ return \"vpku<VI_char>us %0,%2,%1\";
22829
[(set_attr "type" "vecperm")])
22831
-(define_insn "altivec_vpkshus"
22832
- [(set (match_operand:V16QI 0 "register_operand" "=v")
22833
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
22834
- (match_operand:V8HI 2 "register_operand" "v")]
22836
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22838
- "vpkshus %0,%1,%2"
22839
+(define_insn "altivec_vpku<VI_char>um"
22840
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
22841
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
22842
+ (match_operand:VP 2 "register_operand" "v")]
22843
+ UNSPEC_VPACK_UNS_UNS_MOD))]
22847
+ if (BYTES_BIG_ENDIAN)
22848
+ return \"vpku<VI_char>um %0,%1,%2\";
22850
+ return \"vpku<VI_char>um %0,%2,%1\";
22852
[(set_attr "type" "vecperm")])
22854
-(define_insn "altivec_vpkuwus"
22855
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22856
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
22857
- (match_operand:V4SI 2 "register_operand" "v")]
22859
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22861
- "vpkuwus %0,%1,%2"
22862
- [(set_attr "type" "vecperm")])
22864
-(define_insn "altivec_vpkswus"
22865
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22866
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
22867
- (match_operand:V4SI 2 "register_operand" "v")]
22869
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
22871
- "vpkswus %0,%1,%2"
22872
- [(set_attr "type" "vecperm")])
22874
(define_insn "*altivec_vrl<VI_char>"
22875
- [(set (match_operand:VI 0 "register_operand" "=v")
22876
- (rotate:VI (match_operand:VI 1 "register_operand" "v")
22877
- (match_operand:VI 2 "register_operand" "v")))]
22879
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22880
+ (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
22881
+ (match_operand:VI2 2 "register_operand" "v")))]
22883
"vrl<VI_char> %0,%1,%2"
22884
[(set_attr "type" "vecsimple")])
22886
@@ -1172,26 +1255,26 @@
22887
[(set_attr "type" "vecperm")])
22889
(define_insn "*altivec_vsl<VI_char>"
22890
- [(set (match_operand:VI 0 "register_operand" "=v")
22891
- (ashift:VI (match_operand:VI 1 "register_operand" "v")
22892
- (match_operand:VI 2 "register_operand" "v")))]
22894
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22895
+ (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
22896
+ (match_operand:VI2 2 "register_operand" "v")))]
22898
"vsl<VI_char> %0,%1,%2"
22899
[(set_attr "type" "vecsimple")])
22901
(define_insn "*altivec_vsr<VI_char>"
22902
- [(set (match_operand:VI 0 "register_operand" "=v")
22903
- (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
22904
- (match_operand:VI 2 "register_operand" "v")))]
22906
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22907
+ (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
22908
+ (match_operand:VI2 2 "register_operand" "v")))]
22910
"vsr<VI_char> %0,%1,%2"
22911
[(set_attr "type" "vecsimple")])
22913
(define_insn "*altivec_vsra<VI_char>"
22914
- [(set (match_operand:VI 0 "register_operand" "=v")
22915
- (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
22916
- (match_operand:VI 2 "register_operand" "v")))]
22918
+ [(set (match_operand:VI2 0 "register_operand" "=v")
22919
+ (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
22920
+ (match_operand:VI2 2 "register_operand" "v")))]
22922
"vsra<VI_char> %0,%1,%2"
22923
[(set_attr "type" "vecsimple")])
22925
@@ -1335,7 +1418,12 @@
22926
(match_operand:V16QI 3 "register_operand" "")]
22931
+ if (!BYTES_BIG_ENDIAN) {
22932
+ altivec_expand_vec_perm_le (operands);
22937
(define_expand "vec_perm_constv16qi"
22938
[(match_operand:V16QI 0 "register_operand" "")
22939
@@ -1476,14 +1564,22 @@
22940
"vsldoi %0,%1,%2,%3"
22941
[(set_attr "type" "vecperm")])
22943
-(define_insn "altivec_vupkhsb"
22944
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22945
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
22946
- UNSPEC_VUPKHSB))]
22949
+(define_insn "altivec_vupkhs<VU_char>"
22950
+ [(set (match_operand:VP 0 "register_operand" "=v")
22951
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
22952
+ UNSPEC_VUNPACK_HI_SIGN))]
22954
+ "vupkhs<VU_char> %0,%1"
22955
[(set_attr "type" "vecperm")])
22957
+(define_insn "altivec_vupkls<VU_char>"
22958
+ [(set (match_operand:VP 0 "register_operand" "=v")
22959
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
22960
+ UNSPEC_VUNPACK_LO_SIGN))]
22962
+ "vupkls<VU_char> %0,%1"
22963
+ [(set_attr "type" "vecperm")])
22965
(define_insn "altivec_vupkhpx"
22966
[(set (match_operand:V4SI 0 "register_operand" "=v")
22967
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
22968
@@ -1492,22 +1588,6 @@
22970
[(set_attr "type" "vecperm")])
22972
-(define_insn "altivec_vupkhsh"
22973
- [(set (match_operand:V4SI 0 "register_operand" "=v")
22974
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
22975
- UNSPEC_VUPKHSH))]
22978
- [(set_attr "type" "vecperm")])
22980
-(define_insn "altivec_vupklsb"
22981
- [(set (match_operand:V8HI 0 "register_operand" "=v")
22982
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
22983
- UNSPEC_VUPKLSB))]
22986
- [(set_attr "type" "vecperm")])
22988
(define_insn "altivec_vupklpx"
22989
[(set (match_operand:V4SI 0 "register_operand" "=v")
22990
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
22991
@@ -1516,49 +1596,41 @@
22993
[(set_attr "type" "vecperm")])
22995
-(define_insn "altivec_vupklsh"
22996
- [(set (match_operand:V4SI 0 "register_operand" "=v")
22997
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
22998
- UNSPEC_VUPKLSH))]
23001
- [(set_attr "type" "vecperm")])
23003
;; Compare vectors producing a vector result and a predicate, setting CR6 to
23004
;; indicate a combined status
23005
(define_insn "*altivec_vcmpequ<VI_char>_p"
23007
- (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
23008
- (match_operand:VI 2 "register_operand" "v"))]
23009
+ (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
23010
+ (match_operand:VI2 2 "register_operand" "v"))]
23012
- (set (match_operand:VI 0 "register_operand" "=v")
23013
- (eq:VI (match_dup 1)
23015
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
23016
+ (set (match_operand:VI2 0 "register_operand" "=v")
23017
+ (eq:VI2 (match_dup 1)
23020
"vcmpequ<VI_char>. %0,%1,%2"
23021
[(set_attr "type" "veccmp")])
23023
(define_insn "*altivec_vcmpgts<VI_char>_p"
23025
- (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
23026
- (match_operand:VI 2 "register_operand" "v"))]
23027
+ (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
23028
+ (match_operand:VI2 2 "register_operand" "v"))]
23030
- (set (match_operand:VI 0 "register_operand" "=v")
23031
- (gt:VI (match_dup 1)
23033
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
23034
+ (set (match_operand:VI2 0 "register_operand" "=v")
23035
+ (gt:VI2 (match_dup 1)
23038
"vcmpgts<VI_char>. %0,%1,%2"
23039
[(set_attr "type" "veccmp")])
23041
(define_insn "*altivec_vcmpgtu<VI_char>_p"
23043
- (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
23044
- (match_operand:VI 2 "register_operand" "v"))]
23045
+ (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
23046
+ (match_operand:VI2 2 "register_operand" "v"))]
23048
- (set (match_operand:VI 0 "register_operand" "=v")
23049
- (gtu:VI (match_dup 1)
23051
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
23052
+ (set (match_operand:VI2 0 "register_operand" "=v")
23053
+ (gtu:VI2 (match_dup 1)
23056
"vcmpgtu<VI_char>. %0,%1,%2"
23057
[(set_attr "type" "veccmp")])
23059
@@ -1779,20 +1851,28 @@
23060
[(set_attr "type" "vecstore")])
23063
-;; vspltis? SCRATCH0,0
23064
+;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
23065
;; vsubu?m SCRATCH2,SCRATCH1,%1
23066
;; vmaxs? %0,%1,SCRATCH2"
23067
(define_expand "abs<mode>2"
23068
- [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
23069
- (set (match_dup 3)
23070
- (minus:VI (match_dup 2)
23071
- (match_operand:VI 1 "register_operand" "v")))
23072
- (set (match_operand:VI 0 "register_operand" "=v")
23073
- (smax:VI (match_dup 1) (match_dup 3)))]
23075
+ [(set (match_dup 2) (match_dup 3))
23076
+ (set (match_dup 4)
23077
+ (minus:VI2 (match_dup 2)
23078
+ (match_operand:VI2 1 "register_operand" "v")))
23079
+ (set (match_operand:VI2 0 "register_operand" "=v")
23080
+ (smax:VI2 (match_dup 1) (match_dup 4)))]
23083
- operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
23084
- operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
23085
+ int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
23086
+ rtvec v = rtvec_alloc (n_elt);
23088
+ /* Create an all 0 constant. */
23089
+ for (i = 0; i < n_elt; ++i)
23090
+ RTVEC_ELT (v, i) = const0_rtx;
23092
+ operands[2] = gen_reg_rtx (<MODE>mode);
23093
+ operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
23094
+ operands[4] = gen_reg_rtx (<MODE>mode);
23098
@@ -1950,50 +2030,20 @@
23102
-(define_expand "vec_unpacks_hi_v16qi"
23103
- [(set (match_operand:V8HI 0 "register_operand" "=v")
23104
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
23105
- UNSPEC_VUPKHSB))]
23109
- emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
23112
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
23113
+ [(set (match_operand:VP 0 "register_operand" "=v")
23114
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
23115
+ UNSPEC_VUNPACK_HI_SIGN))]
23119
-(define_expand "vec_unpacks_hi_v8hi"
23120
- [(set (match_operand:V4SI 0 "register_operand" "=v")
23121
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
23122
- UNSPEC_VUPKHSH))]
23126
- emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
23129
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
23130
+ [(set (match_operand:VP 0 "register_operand" "=v")
23131
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
23132
+ UNSPEC_VUNPACK_LO_SIGN))]
23136
-(define_expand "vec_unpacks_lo_v16qi"
23137
- [(set (match_operand:V8HI 0 "register_operand" "=v")
23138
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
23139
- UNSPEC_VUPKLSB))]
23143
- emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
23147
-(define_expand "vec_unpacks_lo_v8hi"
23148
- [(set (match_operand:V4SI 0 "register_operand" "=v")
23149
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
23150
- UNSPEC_VUPKLSH))]
23154
- emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
23158
(define_insn "vperm_v8hiv4si"
23159
[(set (match_operand:V4SI 0 "register_operand" "=v")
23160
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
23161
@@ -2025,25 +2075,26 @@
23162
rtx vzero = gen_reg_rtx (V8HImode);
23163
rtx mask = gen_reg_rtx (V16QImode);
23164
rtvec v = rtvec_alloc (16);
23165
+ bool be = BYTES_BIG_ENDIAN;
23167
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
23169
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
23170
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0);
23171
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
23172
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
23173
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
23174
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2);
23175
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
23176
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
23177
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
23178
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4);
23179
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
23180
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
23181
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
23182
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6);
23183
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
23184
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
23185
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
23186
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16);
23187
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6);
23188
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
23189
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
23190
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16);
23191
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4);
23192
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
23193
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
23194
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16);
23195
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2);
23196
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
23197
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
23198
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16);
23199
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0);
23200
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
23202
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
23203
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
23204
@@ -2060,25 +2111,26 @@
23205
rtx vzero = gen_reg_rtx (V4SImode);
23206
rtx mask = gen_reg_rtx (V16QImode);
23207
rtvec v = rtvec_alloc (16);
23208
+ bool be = BYTES_BIG_ENDIAN;
23210
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
23212
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
23213
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
23214
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0);
23215
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
23216
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
23217
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
23218
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2);
23219
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
23220
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
23221
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
23222
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4);
23223
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
23224
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
23225
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
23226
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6);
23227
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
23228
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
23229
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6);
23230
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17);
23231
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
23232
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
23233
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4);
23234
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17);
23235
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
23236
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
23237
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2);
23238
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17);
23239
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
23240
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
23241
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0);
23242
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17);
23243
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
23245
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
23246
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
23247
@@ -2095,25 +2147,26 @@
23248
rtx vzero = gen_reg_rtx (V8HImode);
23249
rtx mask = gen_reg_rtx (V16QImode);
23250
rtvec v = rtvec_alloc (16);
23251
+ bool be = BYTES_BIG_ENDIAN;
23253
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
23255
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
23256
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8);
23257
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
23258
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
23259
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
23260
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
23261
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
23262
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
23263
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
23264
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12);
23265
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
23266
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
23267
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
23268
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14);
23269
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
23270
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
23271
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
23272
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16);
23273
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
23274
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
23275
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
23276
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
23277
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
23278
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
23279
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
23280
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
23281
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
23282
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
23283
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
23284
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
23285
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8);
23286
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
23288
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
23289
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
23290
@@ -2130,25 +2183,26 @@
23291
rtx vzero = gen_reg_rtx (V4SImode);
23292
rtx mask = gen_reg_rtx (V16QImode);
23293
rtvec v = rtvec_alloc (16);
23294
+ bool be = BYTES_BIG_ENDIAN;
23296
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
23298
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
23299
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
23300
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8);
23301
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
23302
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
23303
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
23304
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
23305
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
23306
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
23307
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
23308
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12);
23309
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
23310
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
23311
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
23312
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14);
23313
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
23314
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
23315
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
23316
+ RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17);
23317
+ RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
23318
+ RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
23319
+ RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
23320
+ RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
23321
+ RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
23322
+ RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
23323
+ RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
23324
+ RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
23325
+ RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
23326
+ RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
23327
+ RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8);
23328
+ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
23329
+ RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
23331
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
23332
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
23333
@@ -2168,7 +2222,10 @@
23335
emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
23336
emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
23337
- emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
23338
+ if (BYTES_BIG_ENDIAN)
23339
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
23341
+ emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
23345
@@ -2185,7 +2242,10 @@
23347
emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
23348
emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
23349
- emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
23350
+ if (BYTES_BIG_ENDIAN)
23351
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
23353
+ emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
23357
@@ -2202,7 +2262,10 @@
23359
emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
23360
emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
23361
- emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
23362
+ if (BYTES_BIG_ENDIAN)
23363
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
23365
+ emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
23369
@@ -2219,7 +2282,10 @@
23371
emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
23372
emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
23373
- emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
23374
+ if (BYTES_BIG_ENDIAN)
23375
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
23377
+ emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
23381
@@ -2236,7 +2302,10 @@
23383
emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
23384
emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
23385
- emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
23386
+ if (BYTES_BIG_ENDIAN)
23387
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
23389
+ emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
23393
@@ -2253,7 +2322,10 @@
23395
emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
23396
emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
23397
- emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
23398
+ if (BYTES_BIG_ENDIAN)
23399
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
23401
+ emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
23405
@@ -2270,7 +2342,10 @@
23407
emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
23408
emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
23409
- emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
23410
+ if (BYTES_BIG_ENDIAN)
23411
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
23413
+ emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
23417
@@ -2287,33 +2362,20 @@
23419
emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
23420
emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
23421
- emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
23422
+ if (BYTES_BIG_ENDIAN)
23423
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
23425
+ emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
23429
-(define_expand "vec_pack_trunc_v8hi"
23430
- [(set (match_operand:V16QI 0 "register_operand" "=v")
23431
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
23432
- (match_operand:V8HI 2 "register_operand" "v")]
23433
- UNSPEC_VPKUHUM))]
23437
- emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
23441
-(define_expand "vec_pack_trunc_v4si"
23442
- [(set (match_operand:V8HI 0 "register_operand" "=v")
23443
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
23444
- (match_operand:V4SI 2 "register_operand" "v")]
23445
- UNSPEC_VPKUWUM))]
23449
- emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
23452
+(define_expand "vec_pack_trunc_<mode>"
23453
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
23454
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
23455
+ (match_operand:VP 2 "register_operand" "v")]
23456
+ UNSPEC_VPACK_UNS_UNS_MOD))]
23460
(define_expand "altivec_negv4sf2"
23461
[(use (match_operand:V4SF 0 "register_operand" ""))
23462
@@ -2460,3 +2522,34 @@
23463
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
23468
+;; Power8 vector instructions encoded as Altivec instructions
23470
+;; Vector count leading zeros
23471
+(define_insn "*p8v_clz<mode>2"
23472
+ [(set (match_operand:VI2 0 "register_operand" "=v")
23473
+ (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
23474
+ "TARGET_P8_VECTOR"
23476
+ [(set_attr "length" "4")
23477
+ (set_attr "type" "vecsimple")])
23479
+;; Vector population count
23480
+(define_insn "*p8v_popcount<mode>2"
23481
+ [(set (match_operand:VI2 0 "register_operand" "=v")
23482
+ (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
23483
+ "TARGET_P8_VECTOR"
23484
+ "vpopcnt<wd> %0,%1"
23485
+ [(set_attr "length" "4")
23486
+ (set_attr "type" "vecsimple")])
23488
+;; Vector Gather Bits by Bytes by Doubleword
23489
+(define_insn "p8v_vgbbd"
23490
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
23491
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
23493
+ "TARGET_P8_VECTOR"
23495
+ [(set_attr "length" "4")
23496
+ (set_attr "type" "vecsimple")])
23497
--- a/src/gcc/config/rs6000/sysv4le.h
23498
+++ b/src/gcc/config/rs6000/sysv4le.h
23501
#undef MULTILIB_DEFAULTS
23502
#define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
23504
+/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default. */
23505
+#define LINUX64_DEFAULT_ABI_ELFv2
23507
--- a/src/gcc/config/rs6000/dfp.md
23508
+++ b/src/gcc/config/rs6000/dfp.md
23513
-(define_expand "movsd"
23514
- [(set (match_operand:SD 0 "nonimmediate_operand" "")
23515
- (match_operand:SD 1 "any_operand" ""))]
23516
- "TARGET_HARD_FLOAT && TARGET_FPRS"
23517
- "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
23520
- [(set (match_operand:SD 0 "gpc_reg_operand" "")
23521
- (match_operand:SD 1 "const_double_operand" ""))]
23522
- "reload_completed
23523
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
23524
- || (GET_CODE (operands[0]) == SUBREG
23525
- && GET_CODE (SUBREG_REG (operands[0])) == REG
23526
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
23527
- [(set (match_dup 2) (match_dup 3))]
23531
- REAL_VALUE_TYPE rv;
23533
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
23534
- REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
23536
- if (! TARGET_POWERPC64)
23537
- operands[2] = operand_subword (operands[0], 0, 0, SDmode);
23539
- operands[2] = gen_lowpart (SImode, operands[0]);
23541
- operands[3] = gen_int_mode (l, SImode);
23544
-(define_insn "movsd_hardfloat"
23545
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
23546
- (match_operand:SD 1 "input_operand" "r,m,r,f,r,h,0,G,Fn"))]
23547
- "(gpc_reg_operand (operands[0], SDmode)
23548
- || gpc_reg_operand (operands[1], SDmode))
23549
- && (TARGET_HARD_FLOAT && TARGET_FPRS)"
23560
- [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
23561
- (set_attr "length" "4,4,4,4,4,4,4,4,8")])
23563
-(define_insn "movsd_softfloat"
23564
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
23565
- (match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
23566
- "(gpc_reg_operand (operands[0], SDmode)
23567
- || gpc_reg_operand (operands[1], SDmode))
23568
- && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
23581
- [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
23582
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
23584
(define_insn "movsd_store"
23585
[(set (match_operand:DD 0 "nonimmediate_operand" "=m")
23586
(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
23587
@@ -108,7 +37,14 @@
23588
|| gpc_reg_operand (operands[1], SDmode))
23589
&& TARGET_HARD_FLOAT && TARGET_FPRS"
23591
- [(set_attr "type" "fpstore")
23592
+ [(set (attr "type")
23594
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
23595
+ (const_string "fpstore_ux")
23597
+ (match_test "update_address_mem (operands[0], VOIDmode)")
23598
+ (const_string "fpstore_u")
23599
+ (const_string "fpstore"))))
23600
(set_attr "length" "4")])
23602
(define_insn "movsd_load"
23603
@@ -119,7 +55,14 @@
23604
|| gpc_reg_operand (operands[1], DDmode))
23605
&& TARGET_HARD_FLOAT && TARGET_FPRS"
23607
- [(set_attr "type" "fpload")
23608
+ [(set (attr "type")
23610
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
23611
+ (const_string "fpload_ux")
23613
+ (match_test "update_address_mem (operands[1], VOIDmode)")
23614
+ (const_string "fpload_u")
23615
+ (const_string "fpload"))))
23616
(set_attr "length" "4")])
23618
;; Hardware support for decimal floating point operations.
23619
@@ -182,211 +125,6 @@
23621
[(set_attr "type" "fp")])
23623
-(define_expand "movdd"
23624
- [(set (match_operand:DD 0 "nonimmediate_operand" "")
23625
- (match_operand:DD 1 "any_operand" ""))]
23627
- "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
23630
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
23631
- (match_operand:DD 1 "const_int_operand" ""))]
23632
- "! TARGET_POWERPC64 && reload_completed
23633
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
23634
- || (GET_CODE (operands[0]) == SUBREG
23635
- && GET_CODE (SUBREG_REG (operands[0])) == REG
23636
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
23637
- [(set (match_dup 2) (match_dup 4))
23638
- (set (match_dup 3) (match_dup 1))]
23641
- int endian = (WORDS_BIG_ENDIAN == 0);
23642
- HOST_WIDE_INT value = INTVAL (operands[1]);
23644
- operands[2] = operand_subword (operands[0], endian, 0, DDmode);
23645
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
23646
-#if HOST_BITS_PER_WIDE_INT == 32
23647
- operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
23649
- operands[4] = GEN_INT (value >> 32);
23650
- operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
23655
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
23656
- (match_operand:DD 1 "const_double_operand" ""))]
23657
- "! TARGET_POWERPC64 && reload_completed
23658
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
23659
- || (GET_CODE (operands[0]) == SUBREG
23660
- && GET_CODE (SUBREG_REG (operands[0])) == REG
23661
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
23662
- [(set (match_dup 2) (match_dup 4))
23663
- (set (match_dup 3) (match_dup 5))]
23666
- int endian = (WORDS_BIG_ENDIAN == 0);
23668
- REAL_VALUE_TYPE rv;
23670
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
23671
- REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
23673
- operands[2] = operand_subword (operands[0], endian, 0, DDmode);
23674
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
23675
- operands[4] = gen_int_mode (l[endian], SImode);
23676
- operands[5] = gen_int_mode (l[1 - endian], SImode);
23680
- [(set (match_operand:DD 0 "gpc_reg_operand" "")
23681
- (match_operand:DD 1 "const_double_operand" ""))]
23682
- "TARGET_POWERPC64 && reload_completed
23683
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
23684
- || (GET_CODE (operands[0]) == SUBREG
23685
- && GET_CODE (SUBREG_REG (operands[0])) == REG
23686
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
23687
- [(set (match_dup 2) (match_dup 3))]
23690
- int endian = (WORDS_BIG_ENDIAN == 0);
23692
- REAL_VALUE_TYPE rv;
23693
-#if HOST_BITS_PER_WIDE_INT >= 64
23694
- HOST_WIDE_INT val;
23697
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
23698
- REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
23700
- operands[2] = gen_lowpart (DImode, operands[0]);
23701
- /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */
23702
-#if HOST_BITS_PER_WIDE_INT >= 64
23703
- val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
23704
- | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
23706
- operands[3] = gen_int_mode (val, DImode);
23708
- operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
23712
-;; Don't have reload use general registers to load a constant. First,
23713
-;; it might not work if the output operand is the equivalent of
23714
-;; a non-offsettable memref, but also it is less efficient than loading
23715
-;; the constant into an FP register, since it will probably be used there.
23716
-;; The "??" is a kludge until we can figure out a more reasonable way
23717
-;; of handling these non-offsettable values.
23718
-(define_insn "*movdd_hardfloat32"
23719
- [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
23720
- (match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
23721
- "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
23722
- && (gpc_reg_operand (operands[0], DDmode)
23723
- || gpc_reg_operand (operands[1], DDmode))"
23726
- switch (which_alternative)
23729
- gcc_unreachable ();
23735
- return \"fmr %0,%1\";
23737
- return \"lfd%U1%X1 %0,%1\";
23739
- return \"stfd%U0%X0 %1,%0\";
23746
- [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
23747
- (set_attr "length" "8,16,16,4,4,4,8,12,16")])
23749
-(define_insn "*movdd_softfloat32"
23750
- [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
23751
- (match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
23752
- "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
23753
- && (gpc_reg_operand (operands[0], DDmode)
23754
- || gpc_reg_operand (operands[1], DDmode))"
23756
- [(set_attr "type" "two,load,store,*,*,*")
23757
- (set_attr "length" "8,8,8,8,12,16")])
23759
-; ld/std require word-aligned displacements -> 'Y' constraint.
23760
-; List Y->r and r->Y before r->r for reload.
23761
-(define_insn "*movdd_hardfloat64_mfpgpr"
23762
- [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
23763
- (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
23764
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
23765
- && (gpc_reg_operand (operands[0], DDmode)
23766
- || gpc_reg_operand (operands[1], DDmode))"
23782
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
23783
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
23785
-; ld/std require word-aligned displacements -> 'Y' constraint.
23786
-; List Y->r and r->Y before r->r for reload.
23787
-(define_insn "*movdd_hardfloat64"
23788
- [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
23789
- (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
23790
- "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
23791
- && (gpc_reg_operand (operands[0], DDmode)
23792
- || gpc_reg_operand (operands[1], DDmode))"
23806
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
23807
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
23809
-(define_insn "*movdd_softfloat64"
23810
- [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
23811
- (match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
23812
- "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
23813
- && (gpc_reg_operand (operands[0], DDmode)
23814
- || gpc_reg_operand (operands[1], DDmode))"
23825
- [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
23826
- (set_attr "length" "4,4,4,4,4,8,12,16,4")])
23828
(define_expand "negtd2"
23829
[(set (match_operand:TD 0 "gpc_reg_operand" "")
23830
(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
23831
@@ -410,40 +148,25 @@
23834
(define_insn "*abstd2_fpr"
23835
- [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
23836
- (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
23837
+ [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
23838
+ (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
23839
"TARGET_HARD_FLOAT && TARGET_FPRS"
23841
- [(set_attr "type" "fp")])
23844
+ fabs %0,%1\;fmr %L0,%L1"
23845
+ [(set_attr "type" "fp")
23846
+ (set_attr "length" "4,8")])
23848
(define_insn "*nabstd2_fpr"
23849
- [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
23850
- (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))]
23851
+ [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
23852
+ (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))]
23853
"TARGET_HARD_FLOAT && TARGET_FPRS"
23855
- [(set_attr "type" "fp")])
23858
+ fnabs %0,%1\;fmr %L0,%L1"
23859
+ [(set_attr "type" "fp")
23860
+ (set_attr "length" "4,8")])
23862
-(define_expand "movtd"
23863
- [(set (match_operand:TD 0 "general_operand" "")
23864
- (match_operand:TD 1 "any_operand" ""))]
23865
- "TARGET_HARD_FLOAT && TARGET_FPRS"
23866
- "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
23868
-; It's important to list the Y->r and r->Y moves before r->r because
23869
-; otherwise reload, given m->r, will try to pick r->r and reload it,
23870
-; which doesn't make progress.
23871
-(define_insn_and_split "*movtd_internal"
23872
- [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
23873
- (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
23874
- "TARGET_HARD_FLOAT && TARGET_FPRS
23875
- && (gpc_reg_operand (operands[0], TDmode)
23876
- || gpc_reg_operand (operands[1], TDmode))"
23878
- "&& reload_completed"
23880
-{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
23881
- [(set_attr "length" "8,8,8,20,20,16")])
23883
;; Hardware support for decimal floating point operations.
23885
(define_insn "extendddtd2"
23886
--- a/src/gcc/config/rs6000/sync.md
23887
+++ b/src/gcc/config/rs6000/sync.md
23888
@@ -18,14 +18,23 @@
23889
;; along with GCC; see the file COPYING3. If not see
23890
;; <http://www.gnu.org/licenses/>.
23892
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
23893
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
23894
+(define_mode_attr larx [(QI "lbarx")
23900
+(define_mode_attr stcx [(QI "stbcx.")
23906
(define_code_iterator FETCHOP [plus minus ior xor and])
23907
(define_code_attr fetchop_name
23908
[(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
23909
(define_code_attr fetchop_pred
23910
- [(plus "add_operand") (minus "gpc_reg_operand")
23911
+ [(plus "add_operand") (minus "int_reg_operand")
23912
(ior "logical_operand") (xor "logical_operand") (and "and_operand")])
23914
(define_expand "mem_thread_fence"
23915
@@ -129,16 +138,7 @@
23916
case MEMMODEL_CONSUME:
23917
case MEMMODEL_ACQUIRE:
23918
case MEMMODEL_SEQ_CST:
23919
- if (GET_MODE (operands[0]) == QImode)
23920
- emit_insn (gen_loadsync_qi (operands[0]));
23921
- else if (GET_MODE (operands[0]) == HImode)
23922
- emit_insn (gen_loadsync_hi (operands[0]));
23923
- else if (GET_MODE (operands[0]) == SImode)
23924
- emit_insn (gen_loadsync_si (operands[0]));
23925
- else if (GET_MODE (operands[0]) == DImode)
23926
- emit_insn (gen_loadsync_di (operands[0]));
23928
- gcc_unreachable ();
23929
+ emit_insn (gen_loadsync_<mode> (operands[0]));
23932
gcc_unreachable ();
23933
@@ -170,35 +170,109 @@
23937
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
23938
-;; opcode that is "phased-in". Not implemented as of Power7, so not yet used,
23939
-;; but let's prepare the macros anyway.
23940
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
23941
+;; other than the quad memory operations, which have special restrictions.
23942
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
23943
+;; in and did not show up until power8. TImode atomic lqarx/stqcx. require
23944
+;; special handling due to even/odd register requirements.
23945
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
23946
+ (HI "TARGET_SYNC_HI_QI")
23948
+ (DI "TARGET_POWERPC64")])
23950
-(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")])
23951
+;; Types that we should provide atomic instructions for.
23953
+(define_mode_iterator AINT [QI
23956
+ (DI "TARGET_POWERPC64")
23957
+ (TI "TARGET_SYNC_TI")])
23959
(define_insn "load_locked<mode>"
23960
- [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
23961
+ [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
23962
(unspec_volatile:ATOMIC
23963
[(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
23966
[(set_attr "type" "load_l")])
23968
+(define_insn "load_locked<QHI:mode>_si"
23969
+ [(set (match_operand:SI 0 "int_reg_operand" "=r")
23970
+ (unspec_volatile:SI
23971
+ [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
23972
+ "TARGET_SYNC_HI_QI"
23973
+ "<QHI:larx> %0,%y1"
23974
+ [(set_attr "type" "load_l")])
23976
+;; Use PTImode to get even/odd register pairs
23977
+(define_expand "load_lockedti"
23978
+ [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
23979
+ (use (match_operand:TI 1 "memory_operand" ""))]
23982
+ /* Use a temporary register to force getting an even register for the
23983
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
23985
+ rtx pti = gen_reg_rtx (PTImode);
23986
+ emit_insn (gen_load_lockedpti (pti, operands[1]));
23987
+ emit_move_insn (operands[0], gen_lowpart (TImode, pti));
23991
+(define_insn "load_lockedpti"
23992
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
23993
+ (unspec_volatile:PTI
23994
+ [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
23996
+ && !reg_mentioned_p (operands[0], operands[1])
23997
+ && quad_int_reg_operand (operands[0], PTImode)"
23999
+ [(set_attr "type" "load_l")])
24001
(define_insn "store_conditional<mode>"
24002
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
24003
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
24004
(set (match_operand:ATOMIC 1 "memory_operand" "=Z")
24005
- (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
24006
+ (match_operand:ATOMIC 2 "int_reg_operand" "r"))]
24009
[(set_attr "type" "store_c")])
24011
+(define_expand "store_conditionalti"
24012
+ [(use (match_operand:CC 0 "cc_reg_operand" ""))
24013
+ (use (match_operand:TI 1 "memory_operand" ""))
24014
+ (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
24017
+ rtx op0 = operands[0];
24018
+ rtx op1 = operands[1];
24019
+ rtx op2 = operands[2];
24020
+ rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
24021
+ rtx pti_op2 = gen_reg_rtx (PTImode);
24023
+ /* Use a temporary register to force getting an even register for the
24024
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
24026
+ emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
24027
+ emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
24031
+(define_insn "store_conditionalpti"
24032
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
24033
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
24034
+ (set (match_operand:PTI 1 "memory_operand" "=Z")
24035
+ (match_operand:PTI 2 "quad_int_reg_operand" "r"))]
24036
+ "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
24038
+ [(set_attr "type" "store_c")])
24040
(define_expand "atomic_compare_and_swap<mode>"
24041
- [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out
24042
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out
24043
- (match_operand:INT1 2 "memory_operand" "") ;; memory
24044
- (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected
24045
- (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired
24046
+ [(match_operand:SI 0 "int_reg_operand" "") ;; bool out
24047
+ (match_operand:AINT 1 "int_reg_operand" "") ;; val out
24048
+ (match_operand:AINT 2 "memory_operand" "") ;; memory
24049
+ (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected
24050
+ (match_operand:AINT 4 "int_reg_operand" "") ;; desired
24051
(match_operand:SI 5 "const_int_operand" "") ;; is_weak
24052
(match_operand:SI 6 "const_int_operand" "") ;; model succ
24053
(match_operand:SI 7 "const_int_operand" "")] ;; model fail
24054
@@ -209,9 +283,9 @@
24057
(define_expand "atomic_exchange<mode>"
24058
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
24059
- (match_operand:INT1 1 "memory_operand" "") ;; memory
24060
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; input
24061
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
24062
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
24063
+ (match_operand:AINT 2 "int_reg_operand" "") ;; input
24064
(match_operand:SI 3 "const_int_operand" "")] ;; model
24067
@@ -220,9 +294,9 @@
24070
(define_expand "atomic_<fetchop_name><mode>"
24071
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
24072
- (FETCHOP:INT1 (match_dup 0)
24073
- (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand
24074
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
24075
+ (FETCHOP:AINT (match_dup 0)
24076
+ (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand
24077
(match_operand:SI 2 "const_int_operand" "")] ;; model
24080
@@ -232,8 +306,8 @@
24083
(define_expand "atomic_nand<mode>"
24084
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
24085
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand
24086
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
24087
+ (match_operand:AINT 1 "int_reg_operand" "") ;; operand
24088
(match_operand:SI 2 "const_int_operand" "")] ;; model
24091
@@ -243,10 +317,10 @@
24094
(define_expand "atomic_fetch_<fetchop_name><mode>"
24095
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
24096
- (match_operand:INT1 1 "memory_operand" "") ;; memory
24097
- (FETCHOP:INT1 (match_dup 1)
24098
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
24099
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
24100
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
24101
+ (FETCHOP:AINT (match_dup 1)
24102
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
24103
(match_operand:SI 3 "const_int_operand" "")] ;; model
24106
@@ -256,9 +330,9 @@
24109
(define_expand "atomic_fetch_nand<mode>"
24110
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
24111
- (match_operand:INT1 1 "memory_operand" "") ;; memory
24112
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
24113
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
24114
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
24115
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
24116
(match_operand:SI 3 "const_int_operand" "")] ;; model
24119
@@ -268,10 +342,10 @@
24122
(define_expand "atomic_<fetchop_name>_fetch<mode>"
24123
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
24124
- (match_operand:INT1 1 "memory_operand" "") ;; memory
24125
- (FETCHOP:INT1 (match_dup 1)
24126
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
24127
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
24128
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
24129
+ (FETCHOP:AINT (match_dup 1)
24130
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
24131
(match_operand:SI 3 "const_int_operand" "")] ;; model
24134
@@ -281,9 +355,9 @@
24137
(define_expand "atomic_nand_fetch<mode>"
24138
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
24139
- (match_operand:INT1 1 "memory_operand" "") ;; memory
24140
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
24141
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
24142
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
24143
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
24144
(match_operand:SI 3 "const_int_operand" "")] ;; model
24147
--- a/src/gcc/config/rs6000/crypto.md
24148
+++ b/src/gcc/config/rs6000/crypto.md
24150
+;; Cryptographic instructions added in ISA 2.07
24151
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
24152
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
24154
+;; This file is part of GCC.
24156
+;; GCC is free software; you can redistribute it and/or modify it
24157
+;; under the terms of the GNU General Public License as published
24158
+;; by the Free Software Foundation; either version 3, or (at your
24159
+;; option) any later version.
24161
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
24162
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24163
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24164
+;; License for more details.
24166
+;; You should have received a copy of the GNU General Public License
24167
+;; along with GCC; see the file COPYING3. If not see
24168
+;; <http://www.gnu.org/licenses/>.
24170
+(define_c_enum "unspec"
24173
+ UNSPEC_VCIPHERLAST
24174
+ UNSPEC_VNCIPHERLAST
24180
+;; Iterator for VPMSUM/VPERMXOR
24181
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
24183
+(define_mode_attr CR_char [(V16QI "b")
24188
+;; Iterator for VSHASIGMAD/VSHASIGMAW
24189
+(define_mode_iterator CR_hash [V4SI V2DI])
24191
+;; Iterator for the other crypto functions
24192
+(define_int_iterator CR_code [UNSPEC_VCIPHER
24194
+ UNSPEC_VCIPHERLAST
24195
+ UNSPEC_VNCIPHERLAST])
24197
+(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher")
24198
+ (UNSPEC_VNCIPHER "vncipher")
24199
+ (UNSPEC_VCIPHERLAST "vcipherlast")
24200
+ (UNSPEC_VNCIPHERLAST "vncipherlast")])
24202
+;; 2 operand crypto instructions
24203
+(define_insn "crypto_<CR_insn>"
24204
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
24205
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
24206
+ (match_operand:V2DI 2 "register_operand" "v")]
24209
+ "<CR_insn> %0,%1,%2"
24210
+ [(set_attr "type" "crypto")])
24212
+(define_insn "crypto_vpmsum<CR_char>"
24213
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
24214
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
24215
+ (match_operand:CR_mode 2 "register_operand" "v")]
24218
+ "vpmsum<CR_char> %0,%1,%2"
24219
+ [(set_attr "type" "crypto")])
24221
+;; 3 operand crypto instructions
24222
+(define_insn "crypto_vpermxor_<mode>"
24223
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
24224
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
24225
+ (match_operand:CR_mode 2 "register_operand" "v")
24226
+ (match_operand:CR_mode 3 "register_operand" "v")]
24227
+ UNSPEC_VPERMXOR))]
24229
+ "vpermxor %0,%1,%2,%3"
24230
+ [(set_attr "type" "crypto")])
24232
+;; 1 operand crypto instruction
24233
+(define_insn "crypto_vsbox"
24234
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
24235
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
24239
+ [(set_attr "type" "crypto")])
24241
+;; Hash crypto instructions
24242
+(define_insn "crypto_vshasigma<CR_char>"
24243
+ [(set (match_operand:CR_hash 0 "register_operand" "=v")
24244
+ (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
24245
+ (match_operand:SI 2 "const_0_to_1_operand" "n")
24246
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
24247
+ UNSPEC_VSHASIGMA))]
24249
+ "vshasigma<CR_char> %0,%1,%2,%3"
24250
+ [(set_attr "type" "crypto")])
24251
--- a/src/gcc/config/rs6000/rs6000.md
24252
+++ b/src/gcc/config/rs6000/rs6000.md
24253
@@ -25,10 +25,14 @@
24257
- [(STACK_POINTER_REGNUM 1)
24258
+ [(FIRST_GPR_REGNO 0)
24259
+ (STACK_POINTER_REGNUM 1)
24261
(STATIC_CHAIN_REGNUM 11)
24262
(HARD_FRAME_POINTER_REGNUM 31)
24263
+ (LAST_GPR_REGNO 31)
24264
+ (FIRST_FPR_REGNO 32)
24265
+ (LAST_FPR_REGNO 63)
24268
(ARG_POINTER_REGNUM 67)
24270
(SPE_ACC_REGNO 111)
24271
(SPEFSCR_REGNO 112)
24272
(FRAME_POINTER_REGNUM 113)
24274
- ; ABI defined stack offsets for storing the TOC pointer with AIX calls.
24275
- (TOC_SAVE_OFFSET_32BIT 20)
24276
- (TOC_SAVE_OFFSET_64BIT 40)
24278
- ; Function TOC offset in the AIX function descriptor.
24279
- (AIX_FUNC_DESC_TOC_32BIT 4)
24280
- (AIX_FUNC_DESC_TOC_64BIT 8)
24282
- ; Static chain offset in the AIX function descriptor.
24283
- (AIX_FUNC_DESC_SC_32BIT 8)
24284
- (AIX_FUNC_DESC_SC_64BIT 16)
24285
+ (TFHAR_REGNO 114)
24286
+ (TFIAR_REGNO 115)
24287
+ (TEXASR_REGNO 116)
24291
@@ -123,6 +118,12 @@
24295
+ UNSPEC_P8V_FMRGOW
24296
+ UNSPEC_P8V_MTVSRWZ
24297
+ UNSPEC_P8V_RELOAD_FROM_GPR
24298
+ UNSPEC_P8V_MTVSRD
24299
+ UNSPEC_P8V_XXPERMDI
24300
+ UNSPEC_P8V_RELOAD_FROM_VSX
24304
@@ -142,7 +143,7 @@
24306
;; Define an insn type attribute. This is used in function unit delay
24308
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
24309
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto,htm"
24310
(const_string "integer"))
24312
;; Define floating point instruction sub-types for use with Xfpu.md
24313
@@ -164,7 +165,7 @@
24314
;; Processor type -- this attribute must exactly match the processor_type
24315
;; enumeration in rs6000.h.
24317
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
24318
+(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
24319
(const (symbol_ref "rs6000_cpu_attr")))
24322
@@ -197,6 +198,7 @@
24323
(include "power5.md")
24324
(include "power6.md")
24325
(include "power7.md")
24326
+(include "power8.md")
24327
(include "cell.md")
24328
(include "xfpu.md")
24330
@@ -215,7 +217,7 @@
24331
(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
24333
; Any supported integer mode.
24334
-(define_mode_iterator INT [QI HI SI DI TI])
24335
+(define_mode_iterator INT [QI HI SI DI TI PTI])
24337
; Any supported integer mode that fits in one register.
24338
(define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
24339
@@ -223,6 +225,12 @@
24340
; extend modes for DImode
24341
(define_mode_iterator QHSI [QI HI SI])
24343
+; QImode or HImode for small atomic ops
24344
+(define_mode_iterator QHI [QI HI])
24346
+; HImode or SImode for sign extended fusion ops
24347
+(define_mode_iterator HSI [HI SI])
24349
; SImode or DImode, even if DImode doesn't fit in GPRs.
24350
(define_mode_iterator SDI [SI DI])
24352
@@ -230,6 +238,10 @@
24353
; (one with a '.') will compare; and the size used for arithmetic carries.
24354
(define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
24356
+; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
24357
+; PTImode is GPR only)
24358
+(define_mode_iterator TI2 [TI PTI])
24360
; Any hardware-supported floating-point mode
24361
(define_mode_iterator FP [
24362
(SF "TARGET_HARD_FLOAT
24363
@@ -253,6 +265,49 @@
24364
(V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
24367
+; Floating point move iterators to combine binary and decimal moves
24368
+(define_mode_iterator FMOVE32 [SF SD])
24369
+(define_mode_iterator FMOVE64 [DF DD])
24370
+(define_mode_iterator FMOVE64X [DI DF DD])
24371
+(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
24372
+ (TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
24374
+; Iterators for 128 bit types for direct move
24375
+(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
24383
+; Whether a floating point move is ok, don't allow SD without hardware FP
24384
+(define_mode_attr fmove_ok [(SF "")
24386
+ (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
24389
+; Convert REAL_VALUE to the appropriate bits
24390
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
24391
+ (DF "REAL_VALUE_TO_TARGET_DOUBLE")
24392
+ (SD "REAL_VALUE_TO_TARGET_DECIMAL32")
24393
+ (DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
24395
+; Definitions for load to 32-bit fpr register
24396
+(define_mode_attr f32_lr [(SF "f") (SD "wz")])
24397
+(define_mode_attr f32_lm [(SF "m") (SD "Z")])
24398
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
24399
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1") (SD "lxsiwzx %x0,%y1")])
24401
+; Definitions for store from 32-bit fpr register
24402
+(define_mode_attr f32_sr [(SF "f") (SD "wx")])
24403
+(define_mode_attr f32_sm [(SF "m") (SD "Z")])
24404
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
24405
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwzx %x1,%y0")])
24407
+; Definitions for 32-bit fpr direct move
24408
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
24410
; These modes do not fit in integer registers in 32-bit mode.
24411
; but on e500v2, the gpr are 64 bit registers
24412
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
24413
@@ -263,6 +318,25 @@
24414
; Iterator for just SF/DF
24415
(define_mode_iterator SFDF [SF DF])
24417
+; SF/DF suffix for traditional floating instructions
24418
+(define_mode_attr Ftrad [(SF "s") (DF "")])
24420
+; SF/DF suffix for VSX instructions
24421
+(define_mode_attr Fvsx [(SF "sp") (DF "dp")])
24423
+; SF/DF constraint for arithmetic on traditional floating point registers
24424
+(define_mode_attr Ff [(SF "f") (DF "d")])
24426
+; SF/DF constraint for arithmetic on VSX registers
24427
+(define_mode_attr Fv [(SF "wy") (DF "ws")])
24429
+; s/d suffix for things like fp_addsub_s/fp_addsub_d
24430
+(define_mode_attr Fs [(SF "s") (DF "d")])
24432
+; FRE/FRES support
24433
+(define_mode_attr Ffre [(SF "fres") (DF "fre")])
24434
+(define_mode_attr FFRE [(SF "FRES") (DF "FRE")])
24436
; Conditional returns.
24437
(define_code_iterator any_return [return simple_return])
24438
(define_code_attr return_pred [(return "direct_return ()")
24439
@@ -271,7 +345,14 @@
24441
; Various instructions that come in SI and DI forms.
24442
; A generic w/d attribute, for things like cmpw/cmpd.
24443
-(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
24444
+(define_mode_attr wd [(QI "b")
24454
(define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
24455
@@ -311,6 +392,77 @@
24457
(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
24458
(DF "TARGET_DOUBLE_FLOAT")])
24460
+;; Mode iterator for logical operations on 128-bit types
24461
+(define_mode_iterator BOOL_128 [TI
24463
+ (V16QI "TARGET_ALTIVEC")
24464
+ (V8HI "TARGET_ALTIVEC")
24465
+ (V4SI "TARGET_ALTIVEC")
24466
+ (V4SF "TARGET_ALTIVEC")
24467
+ (V2DI "TARGET_ALTIVEC")
24468
+ (V2DF "TARGET_ALTIVEC")])
24470
+;; For the GPRs we use 3 constraints for register outputs, two that are the
24471
+;; same as the output register, and a third where the output register is an
24472
+;; early clobber, so we don't have to deal with register overlaps. For the
24473
+;; vector types, we prefer to use the vector registers. For TI mode, allow
24476
+;; Mode attribute for boolean operation register constraints for output
24477
+(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wa,v")
24479
+ (V16QI "wa,v,&?r,?r,?r")
24480
+ (V8HI "wa,v,&?r,?r,?r")
24481
+ (V4SI "wa,v,&?r,?r,?r")
24482
+ (V4SF "wa,v,&?r,?r,?r")
24483
+ (V2DI "wa,v,&?r,?r,?r")
24484
+ (V2DF "wa,v,&?r,?r,?r")])
24486
+;; Mode attribute for boolean operation register constraints for operand1
24487
+(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v")
24489
+ (V16QI "wa,v,r,0,r")
24490
+ (V8HI "wa,v,r,0,r")
24491
+ (V4SI "wa,v,r,0,r")
24492
+ (V4SF "wa,v,r,0,r")
24493
+ (V2DI "wa,v,r,0,r")
24494
+ (V2DF "wa,v,r,0,r")])
24496
+;; Mode attribute for boolean operation register constraints for operand2
24497
+(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v")
24499
+ (V16QI "wa,v,r,r,0")
24500
+ (V8HI "wa,v,r,r,0")
24501
+ (V4SI "wa,v,r,r,0")
24502
+ (V4SF "wa,v,r,r,0")
24503
+ (V2DI "wa,v,r,r,0")
24504
+ (V2DF "wa,v,r,r,0")])
24506
+;; Mode attribute for boolean operation register constraints for operand1
24507
+;; for one_cmpl. To simplify things, we repeat the constraint where 0
24508
+;; is used for operand1 or operand2
24509
+(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v")
24511
+ (V16QI "wa,v,r,0,0")
24512
+ (V8HI "wa,v,r,0,0")
24513
+ (V4SI "wa,v,r,0,0")
24514
+ (V4SF "wa,v,r,0,0")
24515
+ (V2DI "wa,v,r,0,0")
24516
+ (V2DF "wa,v,r,0,0")])
24518
+;; Mode attribute for the clobber of CC0 for AND expansion.
24519
+;; For the 128-bit types, we never do AND immediate, but we need to
24520
+;; get the correct number of X's for the number of operands.
24521
+(define_mode_attr BOOL_REGS_AND_CR0 [(TI "X,X,X,X,X")
24523
+ (V16QI "X,X,X,X,X")
24524
+ (V8HI "X,X,X,X,X")
24525
+ (V4SI "X,X,X,X,X")
24526
+ (V4SF "X,X,X,X,X")
24527
+ (V2DI "X,X,X,X,X")
24528
+ (V2DF "X,X,X,X,X")])
24531
;; Start with fixed-point load and store insns. Here we put only the more
24532
;; complex forms. Basic data transfer is done later.
24533
@@ -324,11 +476,19 @@
24534
(define_insn "*zero_extend<mode>di2_internal1"
24535
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
24536
(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
24537
- "TARGET_POWERPC64"
24538
+ "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
24541
rldicl %0,%1,0,<dbits>"
24542
- [(set_attr "type" "load,*")])
24543
+ [(set_attr_alternative "type"
24545
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24546
+ (const_string "load_ux")
24548
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24549
+ (const_string "load_u")
24550
+ (const_string "load")))
24551
+ (const_string "*")])])
24553
(define_insn "*zero_extend<mode>di2_internal2"
24554
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
24555
@@ -382,6 +542,29 @@
24559
+(define_insn "*zero_extendsidi2_lfiwzx"
24560
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
24561
+ (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
24562
+ "TARGET_POWERPC64 && TARGET_LFIWZX"
24565
+ rldicl %0,%1,0,32
24569
+ [(set_attr_alternative "type"
24571
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24572
+ (const_string "load_ux")
24574
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24575
+ (const_string "load_u")
24576
+ (const_string "load")))
24577
+ (const_string "*")
24578
+ (const_string "mffgpr")
24579
+ (const_string "fpload")
24580
+ (const_string "fpload")])])
24582
(define_insn "extendqidi2"
24583
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
24584
(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
24585
@@ -454,7 +637,15 @@
24589
- [(set_attr "type" "load_ext,exts")])
24590
+ [(set_attr_alternative "type"
24592
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24593
+ (const_string "load_ext_ux")
24595
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24596
+ (const_string "load_ext_u")
24597
+ (const_string "load_ext")))
24598
+ (const_string "exts")])])
24601
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
24602
@@ -521,16 +712,47 @@
24607
+(define_insn "*extendsidi2_lfiwax"
24608
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
24609
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
24610
+ "TARGET_POWERPC64 && TARGET_LFIWAX"
24617
+ [(set_attr_alternative "type"
24619
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24620
+ (const_string "load_ext_ux")
24622
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24623
+ (const_string "load_ext_u")
24624
+ (const_string "load_ext")))
24625
+ (const_string "exts")
24626
+ (const_string "mffgpr")
24627
+ (const_string "fpload")
24628
+ (const_string "fpload")])])
24630
+(define_insn "*extendsidi2_nocell"
24631
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
24632
(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
24633
- "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
24634
+ "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
24638
- [(set_attr "type" "load_ext,exts")])
24639
+ [(set_attr_alternative "type"
24641
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24642
+ (const_string "load_ext_ux")
24644
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24645
+ (const_string "load_ext_u")
24646
+ (const_string "load_ext")))
24647
+ (const_string "exts")])])
24650
+(define_insn "*extendsidi2_nocell"
24651
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
24652
(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
24653
"TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
24654
@@ -602,7 +824,15 @@
24657
rlwinm %0,%1,0,0xff"
24658
- [(set_attr "type" "load,*")])
24659
+ [(set_attr_alternative "type"
24661
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24662
+ (const_string "load_ux")
24664
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24665
+ (const_string "load_u")
24666
+ (const_string "load")))
24667
+ (const_string "*")])])
24670
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
24671
@@ -722,7 +952,15 @@
24674
rlwinm %0,%1,0,0xff"
24675
- [(set_attr "type" "load,*")])
24676
+ [(set_attr_alternative "type"
24678
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24679
+ (const_string "load_ux")
24681
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24682
+ (const_string "load_u")
24683
+ (const_string "load")))
24684
+ (const_string "*")])])
24687
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
24688
@@ -848,7 +1086,15 @@
24691
rlwinm %0,%1,0,0xffff"
24692
- [(set_attr "type" "load,*")])
24693
+ [(set_attr_alternative "type"
24695
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24696
+ (const_string "load_ux")
24698
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24699
+ (const_string "load_u")
24700
+ (const_string "load")))
24701
+ (const_string "*")])])
24704
[(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
24705
@@ -915,7 +1161,15 @@
24709
- [(set_attr "type" "load_ext,exts")])
24710
+ [(set_attr_alternative "type"
24712
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
24713
+ (const_string "load_ext_ux")
24715
+ (match_test "update_address_mem (operands[1], VOIDmode)")
24716
+ (const_string "load_ext_u")
24717
+ (const_string "load_ext")))
24718
+ (const_string "exts")])])
24721
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
24722
@@ -1658,7 +1912,19 @@
24726
-(define_insn "one_cmpl<mode>2"
24727
+(define_expand "one_cmpl<mode>2"
24728
+ [(set (match_operand:SDI 0 "gpc_reg_operand" "")
24729
+ (not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
24732
+ if (<MODE>mode == DImode && !TARGET_POWERPC64)
24734
+ rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
24739
+(define_insn "*one_cmpl<mode>2"
24740
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
24741
(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
24743
@@ -1935,7 +2201,9 @@
24744
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
24745
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
24746
"TARGET_CMPB && TARGET_POPCNTB"
24747
- "prty<wd> %0,%1")
24749
+ [(set_attr "length" "4")
24750
+ (set_attr "type" "popcnt")])
24752
(define_expand "parity<mode>2"
24753
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
24754
@@ -4054,7 +4322,7 @@
24758
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
24759
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
24760
(set_attr "length" "4,4,4,8,8,8")])
24763
@@ -4086,7 +4354,7 @@
24767
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
24768
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
24769
(set_attr "length" "4,4,4,8,8,8")])
24772
@@ -4455,224 +4723,226 @@
24776
-;; Floating-point insns, excluding normal data motion.
24778
-;; PowerPC has a full set of single-precision floating point instructions.
24780
-;; For the POWER architecture, we pretend that we have both SFmode and
24781
-;; DFmode insns, while, in fact, all fp insns are actually done in double.
24782
-;; The only conversions we will do will be when storing to memory. In that
24783
-;; case, we will use the "frsp" instruction before storing.
24785
-;; Note that when we store into a single-precision memory location, we need to
24786
-;; use the frsp insn first. If the register being stored isn't dead, we
24787
-;; need a scratch register for the frsp. But this is difficult when the store
24788
-;; is done by reload. It is not incorrect to do the frsp on the register in
24789
-;; this case, we just lose precision that we would have otherwise gotten but
24790
-;; is not guaranteed. Perhaps this should be tightened up at some point.
24792
+;; Floating-point insns, excluding normal data motion. We combine the SF/DF
24793
+;; modes here, and also add in conditional vsx/power8-vector support to access
24794
+;; values in the traditional Altivec registers if the appropriate
24795
+;; -mupper-regs-{df,sf} option is enabled.
24797
-(define_expand "extendsfdf2"
24798
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
24799
- (float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
24800
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
24801
+(define_expand "abs<mode>2"
24802
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24803
+ (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
24804
+ "TARGET_<MODE>_INSN"
24807
-(define_insn_and_split "*extendsfdf2_fpr"
24808
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
24809
- (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
24810
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
24811
+(define_insn "*abs<mode>2_fpr"
24812
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24813
+ (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
24814
+ "TARGET_<MODE>_FPR"
24819
- "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
24822
- emit_note (NOTE_INSN_DELETED);
24825
- [(set_attr "type" "fp,fp,fpload")])
24828
+ [(set_attr "type" "fp")
24829
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
24831
-(define_expand "truncdfsf2"
24832
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24833
- (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
24834
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
24836
+(define_insn "*nabs<mode>2_fpr"
24837
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24840
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))]
24841
+ "TARGET_<MODE>_FPR"
24844
+ xsnabsdp %x0,%x1"
24845
+ [(set_attr "type" "fp")
24846
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
24848
-(define_insn "*truncdfsf2_fpr"
24849
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24850
- (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
24851
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
24853
- [(set_attr "type" "fp")])
24855
-(define_expand "negsf2"
24856
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24857
- (neg:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
24858
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
24859
+(define_expand "neg<mode>2"
24860
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24861
+ (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
24862
+ "TARGET_<MODE>_INSN"
24865
-(define_insn "*negsf2"
24866
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24867
- (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
24868
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24870
- [(set_attr "type" "fp")])
24871
+(define_insn "*neg<mode>2_fpr"
24872
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24873
+ (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
24874
+ "TARGET_<MODE>_FPR"
24878
+ [(set_attr "type" "fp")
24879
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
24881
-(define_expand "abssf2"
24882
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24883
- (abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
24884
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
24885
+(define_expand "add<mode>3"
24886
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24887
+ (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
24888
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
24889
+ "TARGET_<MODE>_INSN"
24892
-(define_insn "*abssf2"
24893
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24894
- (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
24895
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24897
- [(set_attr "type" "fp")])
24898
+(define_insn "*add<mode>3_fpr"
24899
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24900
+ (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
24901
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
24902
+ "TARGET_<MODE>_FPR"
24904
+ fadd<Ftrad> %0,%1,%2
24905
+ xsadd<Fvsx> %x0,%x1,%x2"
24906
+ [(set_attr "type" "fp")
24907
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
24910
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24911
- (neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f"))))]
24912
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24914
- [(set_attr "type" "fp")])
24916
-(define_expand "addsf3"
24917
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24918
- (plus:SF (match_operand:SF 1 "gpc_reg_operand" "")
24919
- (match_operand:SF 2 "gpc_reg_operand" "")))]
24920
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
24921
+(define_expand "sub<mode>3"
24922
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24923
+ (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
24924
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
24925
+ "TARGET_<MODE>_INSN"
24929
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24930
- (plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
24931
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
24932
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24934
+(define_insn "*sub<mode>3_fpr"
24935
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24936
+ (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
24937
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
24938
+ "TARGET_<MODE>_FPR"
24940
+ fsub<Ftrad> %0,%1,%2
24941
+ xssub<Fvsx> %x0,%x1,%x2"
24942
[(set_attr "type" "fp")
24943
- (set_attr "fp_type" "fp_addsub_s")])
24944
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
24946
-(define_expand "subsf3"
24947
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24948
- (minus:SF (match_operand:SF 1 "gpc_reg_operand" "")
24949
- (match_operand:SF 2 "gpc_reg_operand" "")))]
24950
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
24951
+(define_expand "mul<mode>3"
24952
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24953
+ (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
24954
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
24955
+ "TARGET_<MODE>_INSN"
24959
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24960
- (minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
24961
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
24962
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24964
- [(set_attr "type" "fp")
24965
- (set_attr "fp_type" "fp_addsub_s")])
24966
+(define_insn "*mul<mode>3_fpr"
24967
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24968
+ (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
24969
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
24970
+ "TARGET_<MODE>_FPR"
24972
+ fmul<Ftrad> %0,%1,%2
24973
+ xsmul<Fvsx> %x0,%x1,%x2"
24974
+ [(set_attr "type" "dmul")
24975
+ (set_attr "fp_type" "fp_mul_<Fs>")])
24977
-(define_expand "mulsf3"
24978
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
24979
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "")
24980
- (match_operand:SF 2 "gpc_reg_operand" "")))]
24981
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
24982
+(define_expand "div<mode>3"
24983
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
24984
+ (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
24985
+ (match_operand:SFDF 2 "gpc_reg_operand" "")))]
24986
+ "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU"
24990
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
24991
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
24992
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
24993
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
24995
- [(set_attr "type" "fp")
24996
- (set_attr "fp_type" "fp_mul_s")])
24997
+(define_insn "*div<mode>3_fpr"
24998
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
24999
+ (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
25000
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
25001
+ "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU"
25003
+ fdiv<Ftrad> %0,%1,%2
25004
+ xsdiv<Fvsx> %x0,%x1,%x2"
25005
+ [(set_attr "type" "<Fs>div")
25006
+ (set_attr "fp_type" "fp_div_<Fs>")])
25008
-(define_expand "divsf3"
25009
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
25010
- (div:SF (match_operand:SF 1 "gpc_reg_operand" "")
25011
- (match_operand:SF 2 "gpc_reg_operand" "")))]
25012
- "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
25014
+(define_insn "sqrt<mode>2"
25015
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25016
+ (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
25017
+ "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
25018
+ && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
25020
+ fsqrt<Ftrad> %0,%1
25021
+ xssqrt<Fvsx> %x0,%x1"
25022
+ [(set_attr "type" "<Fs>sqrt")
25023
+ (set_attr "fp_type" "fp_sqrt_<Fs>")])
25026
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25027
- (div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
25028
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
25029
- "TARGET_HARD_FLOAT && TARGET_FPRS
25030
- && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
25032
- [(set_attr "type" "sdiv")])
25033
+;; Floating point reciprocal approximation
25034
+(define_insn "fre<Fs>"
25035
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25036
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
25041
+ xsre<Fvsx> %x0,%x1"
25042
+ [(set_attr "type" "fp")])
25044
-(define_insn "fres"
25045
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25046
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
25049
+(define_insn "*rsqrt<mode>2"
25050
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25051
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
25053
+ "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
25055
+ frsqrte<Ftrad> %0,%1
25056
+ xsrsqrte<Fvsx> %x0,%x1"
25057
[(set_attr "type" "fp")])
25059
-; builtin fmaf support
25060
-(define_insn "*fmasf4_fpr"
25061
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25062
- (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
25063
- (match_operand:SF 2 "gpc_reg_operand" "f")
25064
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
25065
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
25066
- "fmadds %0,%1,%2,%3"
25067
- [(set_attr "type" "fp")
25068
- (set_attr "fp_type" "fp_maddsub_s")])
25069
+;; Floating point comparisons
25070
+(define_insn "*cmp<mode>_fpr"
25071
+ [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
25072
+ (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
25073
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
25074
+ "TARGET_<MODE>_FPR"
25077
+ xscmpudp %0,%x1,%x2"
25078
+ [(set_attr "type" "fpcompare")])
25080
-(define_insn "*fmssf4_fpr"
25081
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25082
- (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
25083
- (match_operand:SF 2 "gpc_reg_operand" "f")
25084
- (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
25085
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
25086
- "fmsubs %0,%1,%2,%3"
25087
- [(set_attr "type" "fp")
25088
- (set_attr "fp_type" "fp_maddsub_s")])
25089
+;; Floating point conversions
25090
+(define_expand "extendsfdf2"
25091
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
25092
+ (float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
25093
+ "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25096
-(define_insn "*nfmasf4_fpr"
25097
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25098
- (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
25099
- (match_operand:SF 2 "gpc_reg_operand" "f")
25100
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
25101
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
25102
- "fnmadds %0,%1,%2,%3"
25103
- [(set_attr "type" "fp")
25104
- (set_attr "fp_type" "fp_maddsub_s")])
25105
+(define_insn_and_split "*extendsfdf2_fpr"
25106
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
25107
+ (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
25108
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
25114
+ xxlor %x0,%x1,%x1
25116
+ "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
25119
+ emit_note (NOTE_INSN_DELETED);
25122
+ [(set_attr_alternative "type"
25123
+ [(const_string "fp")
25124
+ (const_string "fp")
25126
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
25127
+ (const_string "fpload_ux")
25129
+ (match_test "update_address_mem (operands[1], VOIDmode)")
25130
+ (const_string "fpload_u")
25131
+ (const_string "fpload")))
25132
+ (const_string "fp")
25133
+ (const_string "vecsimple")
25135
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
25136
+ (const_string "fpload_ux")
25138
+ (match_test "update_address_mem (operands[1], VOIDmode)")
25139
+ (const_string "fpload_u")
25140
+ (const_string "fpload")))])])
25142
-(define_insn "*nfmssf4_fpr"
25143
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25144
- (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
25145
- (match_operand:SF 2 "gpc_reg_operand" "f")
25146
- (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
25147
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
25148
- "fnmsubs %0,%1,%2,%3"
25149
- [(set_attr "type" "fp")
25150
- (set_attr "fp_type" "fp_maddsub_s")])
25152
-(define_expand "sqrtsf2"
25153
+(define_expand "truncdfsf2"
25154
[(set (match_operand:SF 0 "gpc_reg_operand" "")
25155
- (sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
25156
- "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU)
25157
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
25158
- && !TARGET_SIMPLE_FPU"
25159
+ (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
25160
+ "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25164
+(define_insn "*truncdfsf2_fpr"
25165
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25166
- (sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
25167
- "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
25168
- && TARGET_FPRS && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
25170
- [(set_attr "type" "ssqrt")])
25172
-(define_insn "*rsqrtsf_internal1"
25173
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
25174
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
25176
- "TARGET_FRSQRTES"
25178
+ (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
25179
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
25181
[(set_attr "type" "fp")])
25183
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
25184
@@ -4742,39 +5012,84 @@
25185
;; Use an unspec rather providing an if-then-else in RTL, to prevent the
25186
;; compiler from optimizing -0.0
25187
(define_insn "copysign<mode>3_fcpsgn"
25188
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
25189
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
25190
- (match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
25191
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25192
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
25193
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")]
25195
- "TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
25196
- "fcpsgn %0,%2,%1"
25197
+ "TARGET_<MODE>_FPR && TARGET_CMPB"
25200
+ xscpsgn<Fvsx> %x0,%x2,%x1"
25201
[(set_attr "type" "fp")])
25203
;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
25204
;; fsel instruction and some auxiliary computations. Then we just have a
25205
;; single DEFINE_INSN for fsel and the define_splits to make them if made by
25207
-(define_expand "smaxsf3"
25208
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
25209
- (if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
25210
- (match_operand:SF 2 "gpc_reg_operand" ""))
25213
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS
25214
- && TARGET_SINGLE_FLOAT && !flag_trapping_math"
25215
- "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
25216
+;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we
25217
+;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary
25218
+;; computations. Then we just have a single DEFINE_INSN for fsel and the
25219
+;; define_splits to make them if made by combine. On VSX machines we have the
25220
+;; min/max instructions.
25222
+;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector
25223
+;; to allow either DF/SF to use only traditional registers.
25225
-(define_expand "sminsf3"
25226
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
25227
- (if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
25228
- (match_operand:SF 2 "gpc_reg_operand" ""))
25231
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS
25232
- && TARGET_SINGLE_FLOAT && !flag_trapping_math"
25233
- "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
25234
+(define_expand "smax<mode>3"
25235
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25236
+ (if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
25237
+ (match_operand:SFDF 2 "gpc_reg_operand" ""))
25240
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
25242
+ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
25246
+(define_insn "*smax<mode>3_vsx"
25247
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25248
+ (smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
25249
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
25250
+ "TARGET_<MODE>_FPR && TARGET_VSX"
25251
+ "xsmaxdp %x0,%x1,%x2"
25252
+ [(set_attr "type" "fp")])
25254
+(define_expand "smin<mode>3"
25255
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25256
+ (if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
25257
+ (match_operand:SFDF 2 "gpc_reg_operand" ""))
25260
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
25262
+ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
25266
+(define_insn "*smin<mode>3_vsx"
25267
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25268
+ (smin:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
25269
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
25270
+ "TARGET_<MODE>_FPR && TARGET_VSX"
25271
+ "xsmindp %x0,%x1,%x2"
25272
+ [(set_attr "type" "fp")])
25275
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25276
+ (match_operator:SFDF 3 "min_max_operator"
25277
+ [(match_operand:SFDF 1 "gpc_reg_operand" "")
25278
+ (match_operand:SFDF 2 "gpc_reg_operand" "")]))]
25279
+ "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math
25283
+ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]), operands[1],
25289
[(set (match_operand:SF 0 "gpc_reg_operand" "")
25290
(match_operator:SF 3 "min_max_operator"
25291
[(match_operand:SF 1 "gpc_reg_operand" "")
25292
@@ -4904,208 +5219,9 @@
25294
[(set_attr "type" "fp")])
25296
-(define_expand "negdf2"
25297
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25298
- (neg:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
25299
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25302
-(define_insn "*negdf2_fpr"
25303
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25304
- (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
25305
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25306
- && !VECTOR_UNIT_VSX_P (DFmode)"
25308
- [(set_attr "type" "fp")])
25310
-(define_expand "absdf2"
25311
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25312
- (abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
25313
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25316
-(define_insn "*absdf2_fpr"
25317
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25318
- (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
25319
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25320
- && !VECTOR_UNIT_VSX_P (DFmode)"
25322
- [(set_attr "type" "fp")])
25324
-(define_insn "*nabsdf2_fpr"
25325
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25326
- (neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
25327
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25328
- && !VECTOR_UNIT_VSX_P (DFmode)"
25330
- [(set_attr "type" "fp")])
25332
-(define_expand "adddf3"
25333
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25334
- (plus:DF (match_operand:DF 1 "gpc_reg_operand" "")
25335
- (match_operand:DF 2 "gpc_reg_operand" "")))]
25336
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25339
-(define_insn "*adddf3_fpr"
25340
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25341
- (plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
25342
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
25343
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25344
- && !VECTOR_UNIT_VSX_P (DFmode)"
25346
- [(set_attr "type" "fp")
25347
- (set_attr "fp_type" "fp_addsub_d")])
25349
-(define_expand "subdf3"
25350
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25351
- (minus:DF (match_operand:DF 1 "gpc_reg_operand" "")
25352
- (match_operand:DF 2 "gpc_reg_operand" "")))]
25353
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25356
-(define_insn "*subdf3_fpr"
25357
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25358
- (minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
25359
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
25360
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25361
- && !VECTOR_UNIT_VSX_P (DFmode)"
25363
- [(set_attr "type" "fp")
25364
- (set_attr "fp_type" "fp_addsub_d")])
25366
-(define_expand "muldf3"
25367
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25368
- (mult:DF (match_operand:DF 1 "gpc_reg_operand" "")
25369
- (match_operand:DF 2 "gpc_reg_operand" "")))]
25370
- "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
25373
-(define_insn "*muldf3_fpr"
25374
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25375
- (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
25376
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
25377
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25378
- && !VECTOR_UNIT_VSX_P (DFmode)"
25380
- [(set_attr "type" "dmul")
25381
- (set_attr "fp_type" "fp_mul_d")])
25383
-(define_expand "divdf3"
25384
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25385
- (div:DF (match_operand:DF 1 "gpc_reg_operand" "")
25386
- (match_operand:DF 2 "gpc_reg_operand" "")))]
25387
- "TARGET_HARD_FLOAT
25388
- && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)
25389
- && !TARGET_SIMPLE_FPU"
25392
-(define_insn "*divdf3_fpr"
25393
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25394
- (div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
25395
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
25396
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
25397
- && !VECTOR_UNIT_VSX_P (DFmode)"
25399
- [(set_attr "type" "ddiv")])
25401
-(define_insn "*fred_fpr"
25402
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
25403
- (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
25404
- "TARGET_FRE && !VECTOR_UNIT_VSX_P (DFmode)"
25406
- [(set_attr "type" "fp")])
25408
-(define_insn "*rsqrtdf_internal1"
25409
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25410
- (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
25412
- "TARGET_FRSQRTE && !VECTOR_UNIT_VSX_P (DFmode)"
25414
- [(set_attr "type" "fp")])
25416
-; builtin fma support
25417
-(define_insn "*fmadf4_fpr"
25418
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
25419
- (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
25420
- (match_operand:DF 2 "gpc_reg_operand" "f")
25421
- (match_operand:DF 3 "gpc_reg_operand" "f")))]
25422
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25423
- && VECTOR_UNIT_NONE_P (DFmode)"
25424
- "fmadd %0,%1,%2,%3"
25425
- [(set_attr "type" "fp")
25426
- (set_attr "fp_type" "fp_maddsub_d")])
25428
-(define_insn "*fmsdf4_fpr"
25429
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
25430
- (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
25431
- (match_operand:DF 2 "gpc_reg_operand" "f")
25432
- (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
25433
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25434
- && VECTOR_UNIT_NONE_P (DFmode)"
25435
- "fmsub %0,%1,%2,%3"
25436
- [(set_attr "type" "fp")
25437
- (set_attr "fp_type" "fp_maddsub_d")])
25439
-(define_insn "*nfmadf4_fpr"
25440
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
25441
- (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
25442
- (match_operand:DF 2 "gpc_reg_operand" "f")
25443
- (match_operand:DF 3 "gpc_reg_operand" "f"))))]
25444
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25445
- && VECTOR_UNIT_NONE_P (DFmode)"
25446
- "fnmadd %0,%1,%2,%3"
25447
- [(set_attr "type" "fp")
25448
- (set_attr "fp_type" "fp_maddsub_d")])
25450
-(define_insn "*nfmsdf4_fpr"
25451
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
25452
- (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
25453
- (match_operand:DF 2 "gpc_reg_operand" "f")
25454
- (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
25455
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25456
- && VECTOR_UNIT_NONE_P (DFmode)"
25457
- "fnmsub %0,%1,%2,%3"
25458
- [(set_attr "type" "fp")
25459
- (set_attr "fp_type" "fp_maddsub_d")])
25461
-(define_expand "sqrtdf2"
25462
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25463
- (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
25464
- "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
25467
-(define_insn "*sqrtdf2_fpr"
25468
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
25469
- (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
25470
- "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25471
- && !VECTOR_UNIT_VSX_P (DFmode)"
25473
- [(set_attr "type" "dsqrt")])
25475
;; The conditional move instructions allow us to perform max and min
25476
;; operations even when
25478
-(define_expand "smaxdf3"
25479
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25480
- (if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
25481
- (match_operand:DF 2 "gpc_reg_operand" ""))
25484
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25485
- && !flag_trapping_math"
25486
- "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
25488
-(define_expand "smindf3"
25489
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
25490
- (if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
25491
- (match_operand:DF 2 "gpc_reg_operand" ""))
25494
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
25495
- && !flag_trapping_math"
25496
- "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
25499
[(set (match_operand:DF 0 "gpc_reg_operand" "")
25500
(match_operator:DF 3 "min_max_operator"
25501
@@ -5159,12 +5275,15 @@
25502
; We don't define lfiwax/lfiwzx with the normal definition, because we
25503
; don't want to support putting SImode in FPR registers.
25504
(define_insn "lfiwax"
25505
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
25506
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
25507
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
25508
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
25510
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
25512
- [(set_attr "type" "fpload")])
25517
+ [(set_attr "type" "fpload,fpload,mffgpr")])
25519
; This split must be run before register allocation because it allocates the
25520
; memory slot that is needed to move values to/from the FPR. We don't allocate
25521
@@ -5186,7 +5305,8 @@
25522
rtx src = operands[1];
25525
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
25526
+ if (!MEM_P (src) && TARGET_POWERPC64
25527
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
25528
tmp = convert_to_mode (DImode, src, false);
25531
@@ -5235,12 +5355,15 @@
25532
(set_attr "type" "fpload")])
25534
(define_insn "lfiwzx"
25535
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
25536
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
25537
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
25538
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
25540
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
25542
- [(set_attr "type" "fpload")])
25547
+ [(set_attr "type" "fpload,fpload,mftgpr")])
25549
(define_insn_and_split "floatunssi<mode>2_lfiwzx"
25550
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
25551
@@ -5257,7 +5380,8 @@
25552
rtx src = operands[1];
25555
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
25556
+ if (!MEM_P (src) && TARGET_POWERPC64
25557
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
25558
tmp = convert_to_mode (DImode, src, true);
25561
@@ -5548,7 +5672,7 @@
25562
emit_insn (gen_stfiwx (dest, tmp));
25565
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
25566
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
25568
dest = gen_lowpart (DImode, dest);
25569
emit_move_insn (dest, tmp);
25570
@@ -5642,7 +5766,7 @@
25571
emit_insn (gen_stfiwx (dest, tmp));
25574
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
25575
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
25577
dest = gen_lowpart (DImode, dest);
25578
emit_move_insn (dest, tmp);
25579
@@ -5781,66 +5905,52 @@
25580
[(set (match_operand:DI 0 "gpc_reg_operand" "=d")
25581
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
25583
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
25584
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
25586
[(set_attr "type" "fp")])
25588
-(define_expand "btrunc<mode>2"
25589
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25590
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
25591
+(define_insn "btrunc<mode>2"
25592
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25593
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
25595
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
25597
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
25601
+ [(set_attr "type" "fp")
25602
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
25604
-(define_insn "*btrunc<mode>2_fpr"
25605
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
25606
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
25608
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
25609
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
25611
- [(set_attr "type" "fp")])
25613
-(define_expand "ceil<mode>2"
25614
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25615
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
25616
+(define_insn "ceil<mode>2"
25617
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25618
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
25620
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
25622
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
25626
+ [(set_attr "type" "fp")
25627
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
25629
-(define_insn "*ceil<mode>2_fpr"
25630
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
25631
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
25633
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
25634
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
25636
- [(set_attr "type" "fp")])
25638
-(define_expand "floor<mode>2"
25639
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
25640
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
25641
+(define_insn "floor<mode>2"
25642
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
25643
+ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
25645
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
25647
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
25651
+ [(set_attr "type" "fp")
25652
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
25654
-(define_insn "*floor<mode>2_fpr"
25655
- [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
25656
- (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
25658
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
25659
- && !VECTOR_UNIT_VSX_P (<MODE>mode)"
25661
- [(set_attr "type" "fp")])
25663
;; No VSX equivalent to frin
25664
(define_insn "round<mode>2"
25665
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
25666
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
25668
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
25669
+ "TARGET_<MODE>_FPR && TARGET_FPRND"
25671
- [(set_attr "type" "fp")])
25672
+ [(set_attr "type" "fp")
25673
+ (set_attr "fp_type" "fp_addsub_<Fs>")])
25675
; An UNSPEC is used so we don't have to support SImode in FP registers.
25676
(define_insn "stfiwx"
25677
@@ -7195,10 +7305,19 @@
25679
[(set (match_operand:DI 0 "gpc_reg_operand" "")
25680
(and:DI (match_operand:DI 1 "gpc_reg_operand" "")
25681
- (match_operand:DI 2 "and64_2_operand" "")))
25682
+ (match_operand:DI 2 "reg_or_cint_operand" "")))
25683
(clobber (match_scratch:CC 3 ""))])]
25684
- "TARGET_POWERPC64"
25688
+ if (!TARGET_POWERPC64)
25690
+ rtx cc = gen_rtx_SCRATCH (CCmode);
25691
+ rs6000_split_logical (operands, AND, false, false, false, cc);
25694
+ else if (!and64_2_operand (operands[2], DImode))
25695
+ operands[2] = force_reg (DImode, operands[2]);
25698
(define_insn "anddi3_mc"
25699
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
25700
@@ -7379,12 +7498,18 @@
25701
(define_expand "iordi3"
25702
[(set (match_operand:DI 0 "gpc_reg_operand" "")
25703
(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
25704
- (match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
25705
- "TARGET_POWERPC64"
25707
+ (match_operand:DI 2 "reg_or_cint_operand" "")))]
25710
- if (non_logical_cint_operand (operands[2], DImode))
25711
+ if (!TARGET_POWERPC64)
25713
+ rs6000_split_logical (operands, IOR, false, false, false, NULL_RTX);
25716
+ else if (!reg_or_logical_cint_operand (operands[2], DImode))
25717
+ operands[2] = force_reg (DImode, operands[2]);
25718
+ else if (non_logical_cint_operand (operands[2], DImode))
25720
HOST_WIDE_INT value;
25721
rtx tmp = ((!can_create_pseudo_p ()
25722
|| rtx_equal_p (operands[0], operands[1]))
25723
@@ -7408,15 +7533,21 @@
25724
emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
25730
(define_expand "xordi3"
25731
[(set (match_operand:DI 0 "gpc_reg_operand" "")
25732
(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
25733
- (match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
25734
- "TARGET_POWERPC64"
25736
+ (match_operand:DI 2 "reg_or_cint_operand" "")))]
25739
+ if (!TARGET_POWERPC64)
25741
+ rs6000_split_logical (operands, XOR, false, false, false, NULL_RTX);
25744
+ else if (!reg_or_logical_cint_operand (operands[2], DImode))
25745
+ operands[2] = force_reg (DImode, operands[2]);
25746
if (non_logical_cint_operand (operands[2], DImode))
25748
HOST_WIDE_INT value;
25749
@@ -7442,7 +7573,7 @@
25750
emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
25756
(define_insn "*booldi3_internal1"
25757
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
25758
@@ -7678,7 +7809,385 @@
25759
(compare:CC (match_dup 0)
25764
+(define_insn "*eqv<mode>3"
25765
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
25767
+ (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
25768
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
25771
+ [(set_attr "type" "integer")
25772
+ (set_attr "length" "4")])
25775
+;; 128-bit logical operations expanders
25777
+(define_expand "and<mode>3"
25778
+ [(parallel [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25780
+ (match_operand:BOOL_128 1 "vlogical_operand" "")
25781
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))
25782
+ (clobber (match_scratch:CC 3 ""))])]
25786
+(define_expand "ior<mode>3"
25787
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25788
+ (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
25789
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))]
25793
+(define_expand "xor<mode>3"
25794
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25795
+ (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
25796
+ (match_operand:BOOL_128 2 "vlogical_operand" "")))]
25800
+(define_expand "one_cmpl<mode>2"
25801
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25802
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
25806
+(define_expand "nor<mode>3"
25807
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25809
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
25810
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
25814
+(define_expand "andc<mode>3"
25815
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25817
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
25818
+ (match_operand:BOOL_128 1 "vlogical_operand" "")))]
25822
+;; Power8 vector logical instructions.
25823
+(define_expand "eqv<mode>3"
25824
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25826
+ (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
25827
+ (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
25828
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
25831
+;; Rewrite nand into canonical form
25832
+(define_expand "nand<mode>3"
25833
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25835
+ (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
25836
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
25837
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
25840
+;; The canonical form is to have the negated element first, so we need to
25841
+;; reverse arguments.
25842
+(define_expand "orc<mode>3"
25843
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
25845
+ (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
25846
+ (match_operand:BOOL_128 1 "vlogical_operand" "")))]
25847
+ "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
25850
+;; 128-bit logical operations insns and split operations
25851
+(define_insn_and_split "*and<mode>3_internal"
25852
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
25854
+ (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
25855
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))
25856
+ (clobber (match_scratch:CC 3 "<BOOL_REGS_AND_CR0>"))]
25859
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
25860
+ return "xxland %x0,%x1,%x2";
25862
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
25863
+ return "vand %0,%1,%2";
25867
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
25870
+ rs6000_split_logical (operands, AND, false, false, false, operands[3]);
25873
+ [(set (attr "type")
25875
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25876
+ (const_string "vecsimple")
25877
+ (const_string "integer")))
25878
+ (set (attr "length")
25880
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25881
+ (const_string "4")
25883
+ (match_test "TARGET_POWERPC64")
25884
+ (const_string "8")
25885
+ (const_string "16"))))])
25887
+;; 128-bit IOR/XOR
25888
+(define_insn_and_split "*bool<mode>3_internal"
25889
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
25890
+ (match_operator:BOOL_128 3 "boolean_or_operator"
25891
+ [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
25892
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
25895
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
25896
+ return "xxl%q3 %x0,%x1,%x2";
25898
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
25899
+ return "v%q3 %0,%1,%2";
25903
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
25906
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false,
25910
+ [(set (attr "type")
25912
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25913
+ (const_string "vecsimple")
25914
+ (const_string "integer")))
25915
+ (set (attr "length")
25917
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25918
+ (const_string "4")
25920
+ (match_test "TARGET_POWERPC64")
25921
+ (const_string "8")
25922
+ (const_string "16"))))])
25924
+;; 128-bit ANDC/ORC
25925
+(define_insn_and_split "*boolc<mode>3_internal1"
25926
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
25927
+ (match_operator:BOOL_128 3 "boolean_operator"
25929
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
25930
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
25931
+ "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
25933
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
25934
+ return "xxl%q3 %x0,%x1,%x2";
25936
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
25937
+ return "v%q3 %0,%1,%2";
25941
+ "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
25942
+ && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
25945
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
25949
+ [(set (attr "type")
25951
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25952
+ (const_string "vecsimple")
25953
+ (const_string "integer")))
25954
+ (set (attr "length")
25956
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
25957
+ (const_string "4")
25959
+ (match_test "TARGET_POWERPC64")
25960
+ (const_string "8")
25961
+ (const_string "16"))))])
25963
+(define_insn_and_split "*boolc<mode>3_internal2"
25964
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
25965
+ (match_operator:TI2 3 "boolean_operator"
25967
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
25968
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
25969
+ "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
25971
+ "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
25974
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
25978
+ [(set_attr "type" "integer")
25979
+ (set (attr "length")
25981
+ (match_test "TARGET_POWERPC64")
25982
+ (const_string "8")
25983
+ (const_string "16")))])
25985
+;; 128-bit NAND/NOR
25986
+(define_insn_and_split "*boolcc<mode>3_internal1"
25987
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
25988
+ (match_operator:BOOL_128 3 "boolean_operator"
25990
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>"))
25992
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))]
25993
+ "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
25995
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
25996
+ return "xxl%q3 %x0,%x1,%x2";
25998
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
25999
+ return "v%q3 %0,%1,%2";
26003
+ "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
26004
+ && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
26007
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
26011
+ [(set (attr "type")
26013
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26014
+ (const_string "vecsimple")
26015
+ (const_string "integer")))
26016
+ (set (attr "length")
26018
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26019
+ (const_string "4")
26021
+ (match_test "TARGET_POWERPC64")
26022
+ (const_string "8")
26023
+ (const_string "16"))))])
26025
+(define_insn_and_split "*boolcc<mode>3_internal2"
26026
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
26027
+ (match_operator:TI2 3 "boolean_operator"
26029
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
26031
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))]
26032
+ "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
26034
+ "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
26037
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
26041
+ [(set_attr "type" "integer")
26042
+ (set (attr "length")
26044
+ (match_test "TARGET_POWERPC64")
26045
+ (const_string "8")
26046
+ (const_string "16")))])
26050
+(define_insn_and_split "*eqv<mode>3_internal1"
26051
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
26054
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")
26055
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
26056
+ "TARGET_P8_VECTOR"
26058
+ if (vsx_register_operand (operands[0], <MODE>mode))
26059
+ return "xxleqv %x0,%x1,%x2";
26063
+ "TARGET_P8_VECTOR && reload_completed
26064
+ && int_reg_operand (operands[0], <MODE>mode)"
26067
+ rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
26070
+ [(set (attr "type")
26072
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26073
+ (const_string "vecsimple")
26074
+ (const_string "integer")))
26075
+ (set (attr "length")
26077
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26078
+ (const_string "4")
26080
+ (match_test "TARGET_POWERPC64")
26081
+ (const_string "8")
26082
+ (const_string "16"))))])
26084
+(define_insn_and_split "*eqv<mode>3_internal2"
26085
+ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
26088
+ (match_operand:TI2 1 "int_reg_operand" "r,0,r")
26089
+ (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))]
26090
+ "!TARGET_P8_VECTOR"
26092
+ "reload_completed && !TARGET_P8_VECTOR"
26095
+ rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
26098
+ [(set_attr "type" "integer")
26099
+ (set (attr "length")
26101
+ (match_test "TARGET_POWERPC64")
26102
+ (const_string "8")
26103
+ (const_string "16")))])
26105
+;; 128-bit one's complement
26106
+(define_insn_and_split "*one_cmpl<mode>3_internal"
26107
+ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
26109
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
26112
+ if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
26113
+ return "xxlnor %x0,%x1,%x1";
26115
+ if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
26116
+ return "vnor %0,%1,%1";
26120
+ "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
26123
+ rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
26126
+ [(set (attr "type")
26128
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26129
+ (const_string "vecsimple")
26130
+ (const_string "integer")))
26131
+ (set (attr "length")
26133
+ (match_test "vsx_register_operand (operands[0], <MODE>mode)")
26134
+ (const_string "4")
26136
+ (match_test "TARGET_POWERPC64")
26137
+ (const_string "8")
26138
+ (const_string "16"))))])
26141
;; Now define ways of moving data around.
26143
;; Set up a register with a value from the GOT table
26144
@@ -7765,7 +8274,31 @@
26148
- [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*")
26149
+ [(set_attr_alternative "type"
26150
+ [(const_string "*")
26151
+ (const_string "*")
26153
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26154
+ (const_string "load_ux")
26156
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26157
+ (const_string "load_u")
26158
+ (const_string "load")))
26160
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26161
+ (const_string "store_ux")
26163
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26164
+ (const_string "store_u")
26165
+ (const_string "store")))
26166
+ (const_string "*")
26167
+ (const_string "*")
26168
+ (const_string "*")
26169
+ (const_string "mfjmpr")
26170
+ (const_string "mtjmpr")
26171
+ (const_string "*")
26172
+ (const_string "*")])
26174
(set_attr "length" "4,4,4,4,4,4,8,4,4,4,4")])
26176
(define_insn "*movsi_internal1_single"
26177
@@ -7787,7 +8320,44 @@
26181
- [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,*,*")
26182
+ [(set_attr_alternative "type"
26183
+ [(const_string "*")
26184
+ (const_string "*")
26186
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26187
+ (const_string "load_ux")
26189
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26190
+ (const_string "load_u")
26191
+ (const_string "load")))
26193
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26194
+ (const_string "store_ux")
26196
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26197
+ (const_string "store_u")
26198
+ (const_string "store")))
26199
+ (const_string "*")
26200
+ (const_string "*")
26201
+ (const_string "*")
26202
+ (const_string "mfjmpr")
26203
+ (const_string "mtjmpr")
26204
+ (const_string "*")
26205
+ (const_string "*")
26207
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26208
+ (const_string "fpstore_ux")
26210
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26211
+ (const_string "fpstore_u")
26212
+ (const_string "fpstore")))
26214
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26215
+ (const_string "fpload_ux")
26217
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26218
+ (const_string "fpload_u")
26219
+ (const_string "fpload")))])
26220
(set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
26222
;; Split a load of a large constant into the appropriate two-insn
26223
@@ -7822,7 +8392,7 @@
26227
- [(set_attr "type" "cmp,compare,cmp")
26228
+ [(set_attr "type" "cmp,fast_compare,cmp")
26229
(set_attr "length" "4,4,8")])
26232
@@ -7850,7 +8420,26 @@
26236
- [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
26237
+ [(set_attr_alternative "type"
26238
+ [(const_string "*")
26240
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26241
+ (const_string "load_ux")
26243
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26244
+ (const_string "load_u")
26245
+ (const_string "load")))
26247
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26248
+ (const_string "store_ux")
26250
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26251
+ (const_string "store_u")
26252
+ (const_string "store")))
26253
+ (const_string "*")
26254
+ (const_string "mfjmpr")
26255
+ (const_string "mtjmpr")
26256
+ (const_string "*")])])
26258
(define_expand "mov<mode>"
26259
[(set (match_operand:INT 0 "general_operand" "")
26260
@@ -7871,7 +8460,26 @@
26264
- [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
26265
+ [(set_attr_alternative "type"
26266
+ [(const_string "*")
26268
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26269
+ (const_string "load_ux")
26271
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26272
+ (const_string "load_u")
26273
+ (const_string "load")))
26275
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26276
+ (const_string "store_ux")
26278
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26279
+ (const_string "store_u")
26280
+ (const_string "store")))
26281
+ (const_string "*")
26282
+ (const_string "mfjmpr")
26283
+ (const_string "mtjmpr")
26284
+ (const_string "*")])])
26286
;; Here is how to move condition codes around. When we store CC data in
26287
;; an integer register or memory, we store just the high-order 4 bits.
26288
@@ -7899,7 +8507,7 @@
26294
[(set (attr "type")
26295
(cond [(eq_attr "alternative" "0,3")
26296
(const_string "cr_logical")
26297
@@ -7912,9 +8520,23 @@
26298
(eq_attr "alternative" "9")
26299
(const_string "mtjmpr")
26300
(eq_attr "alternative" "10")
26301
- (const_string "load")
26303
+ (match_test "update_indexed_address_mem (operands[1],
26305
+ (const_string "load_ux")
26307
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26308
+ (const_string "load_u")
26309
+ (const_string "load")))
26310
(eq_attr "alternative" "11")
26311
- (const_string "store")
26313
+ (match_test "update_indexed_address_mem (operands[0],
26315
+ (const_string "store_ux")
26317
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26318
+ (const_string "store_u")
26319
+ (const_string "store")))
26320
(match_test "TARGET_MFCRF")
26321
(const_string "mfcrf")
26323
@@ -7926,15 +8548,17 @@
26324
;; can produce floating-point values in fixed-point registers. Unless the
26325
;; value is a simple constant or already in memory, we deal with this by
26326
;; allocating memory and copying the value explicitly via that memory location.
26327
-(define_expand "movsf"
26328
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
26329
- (match_operand:SF 1 "any_operand" ""))]
26331
- "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
26333
+;; Move 32-bit binary/decimal floating point
26334
+(define_expand "mov<mode>"
26335
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
26336
+ (match_operand:FMOVE32 1 "any_operand" ""))]
26338
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
26341
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
26342
- (match_operand:SF 1 "const_double_operand" ""))]
26343
+ [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
26344
+ (match_operand:FMOVE32 1 "const_double_operand" ""))]
26346
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
26347
|| (GET_CODE (operands[0]) == SUBREG
26348
@@ -7947,42 +8571,89 @@
26349
REAL_VALUE_TYPE rv;
26351
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
26352
- REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26353
+ <real_value_to_target> (rv, l);
26355
if (! TARGET_POWERPC64)
26356
- operands[2] = operand_subword (operands[0], 0, 0, SFmode);
26357
+ operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
26359
operands[2] = gen_lowpart (SImode, operands[0]);
26361
operands[3] = gen_int_mode (l, SImode);
26364
-(define_insn "*movsf_hardfloat"
26365
- [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
26366
- (match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
26367
- "(gpc_reg_operand (operands[0], SFmode)
26368
- || gpc_reg_operand (operands[1], SFmode))
26369
+(define_insn "mov<mode>_hardfloat"
26370
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
26371
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
26372
+ "(gpc_reg_operand (operands[0], <MODE>mode)
26373
+ || gpc_reg_operand (operands[1], <MODE>mode))
26374
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
26382
+ xxlor %x0,%x1,%x1
26383
+ xxlxor %x0,%x0,%x0
26395
- [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
26396
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
26397
+ [(set_attr_alternative "type"
26398
+ [(const_string "*")
26400
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26401
+ (const_string "load_ux")
26403
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26404
+ (const_string "load_u")
26405
+ (const_string "load")))
26407
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26408
+ (const_string "store_ux")
26410
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26411
+ (const_string "store_u")
26412
+ (const_string "store")))
26413
+ (const_string "fp")
26414
+ (const_string "vecsimple")
26415
+ (const_string "vecsimple")
26417
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26418
+ (const_string "fpload_ux")
26420
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26421
+ (const_string "fpload_u")
26422
+ (const_string "fpload")))
26424
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26425
+ (const_string "fpstore_ux")
26427
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26428
+ (const_string "fpstore_u")
26429
+ (const_string "fpstore")))
26430
+ (const_string "fpload")
26431
+ (const_string "fpstore")
26432
+ (const_string "mftgpr")
26433
+ (const_string "mffgpr")
26434
+ (const_string "mtjmpr")
26435
+ (const_string "mfjmpr")
26436
+ (const_string "*")
26437
+ (const_string "*")
26438
+ (const_string "*")])
26439
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
26441
-(define_insn "*movsf_softfloat"
26442
- [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
26443
- (match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
26444
- "(gpc_reg_operand (operands[0], SFmode)
26445
- || gpc_reg_operand (operands[1], SFmode))
26446
+(define_insn "*mov<mode>_softfloat"
26447
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
26448
+ (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
26449
+ "(gpc_reg_operand (operands[0], <MODE>mode)
26450
+ || gpc_reg_operand (operands[1], <MODE>mode))
26451
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
26454
@@ -7995,19 +8666,42 @@
26458
- [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*")
26459
+ [(set_attr_alternative "type"
26460
+ [(const_string "*")
26461
+ (const_string "mtjmpr")
26462
+ (const_string "mfjmpr")
26464
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26465
+ (const_string "load_ux")
26467
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26468
+ (const_string "load_u")
26469
+ (const_string "load")))
26471
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26472
+ (const_string "store_ux")
26474
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26475
+ (const_string "store_u")
26476
+ (const_string "store")))
26477
+ (const_string "*")
26478
+ (const_string "*")
26479
+ (const_string "*")
26480
+ (const_string "*")
26481
+ (const_string "*")])
26482
(set_attr "length" "4,4,4,4,4,4,4,4,8,4")])
26485
-(define_expand "movdf"
26486
- [(set (match_operand:DF 0 "nonimmediate_operand" "")
26487
- (match_operand:DF 1 "any_operand" ""))]
26488
+;; Move 64-bit binary/decimal floating point
26489
+(define_expand "mov<mode>"
26490
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "")
26491
+ (match_operand:FMOVE64 1 "any_operand" ""))]
26493
- "{ rs6000_emit_move (operands[0], operands[1], DFmode); DONE; }")
26494
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
26497
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
26498
- (match_operand:DF 1 "const_int_operand" ""))]
26499
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
26500
+ (match_operand:FMOVE64 1 "const_int_operand" ""))]
26501
"! TARGET_POWERPC64 && reload_completed
26502
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
26503
|| (GET_CODE (operands[0]) == SUBREG
26504
@@ -8020,8 +8714,8 @@
26505
int endian = (WORDS_BIG_ENDIAN == 0);
26506
HOST_WIDE_INT value = INTVAL (operands[1]);
26508
- operands[2] = operand_subword (operands[0], endian, 0, DFmode);
26509
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
26510
+ operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
26511
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
26512
#if HOST_BITS_PER_WIDE_INT == 32
26513
operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
26515
@@ -8031,8 +8725,8 @@
26519
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
26520
- (match_operand:DF 1 "const_double_operand" ""))]
26521
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
26522
+ (match_operand:FMOVE64 1 "const_double_operand" ""))]
26523
"! TARGET_POWERPC64 && reload_completed
26524
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
26525
|| (GET_CODE (operands[0]) == SUBREG
26526
@@ -8047,17 +8741,17 @@
26527
REAL_VALUE_TYPE rv;
26529
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
26530
- REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
26531
+ <real_value_to_target> (rv, l);
26533
- operands[2] = operand_subword (operands[0], endian, 0, DFmode);
26534
- operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
26535
+ operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
26536
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
26537
operands[4] = gen_int_mode (l[endian], SImode);
26538
operands[5] = gen_int_mode (l[1 - endian], SImode);
26542
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
26543
- (match_operand:DF 1 "const_double_operand" ""))]
26544
+ [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
26545
+ (match_operand:FMOVE64 1 "const_double_operand" ""))]
26546
"TARGET_POWERPC64 && reload_completed
26547
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
26548
|| (GET_CODE (operands[0]) == SUBREG
26549
@@ -8074,7 +8768,7 @@
26552
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
26553
- REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
26554
+ <real_value_to_target> (rv, l);
26556
operands[2] = gen_lowpart (DImode, operands[0]);
26557
/* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */
26558
@@ -8099,22 +8793,19 @@
26559
;; since the D-form version of the memory instructions does not need a GPR for
26562
-(define_insn "*movdf_hardfloat32"
26563
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
26564
- (match_operand:DF 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
26565
+(define_insn "*mov<mode>_hardfloat32"
26566
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
26567
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
26568
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
26569
- && (gpc_reg_operand (operands[0], DFmode)
26570
- || gpc_reg_operand (operands[1], DFmode))"
26571
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26572
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26580
- stxsd%U0x %x1,%y0
26582
- xxlor %x0,%x1,%x1
26586
@@ -8122,115 +8813,140 @@
26590
- [(set_attr "type" "fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*")
26591
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
26592
+ [(set_attr_alternative "type"
26594
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26595
+ (const_string "fpstore_ux")
26597
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26598
+ (const_string "fpstore_u")
26599
+ (const_string "fpstore")))
26601
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26602
+ (const_string "fpload_ux")
26604
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26605
+ (const_string "fpload_u")
26606
+ (const_string "fpload")))
26607
+ (const_string "fp")
26609
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26610
+ (const_string "fpload_ux")
26611
+ (const_string "fpload"))
26613
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26614
+ (const_string "fpstore_ux")
26615
+ (const_string "fpstore"))
26616
+ (const_string "vecsimple")
26617
+ (const_string "vecsimple")
26618
+ (const_string "store")
26619
+ (const_string "load")
26620
+ (const_string "two")
26621
+ (const_string "fp")
26622
+ (const_string "fp")
26623
+ (const_string "*")])
26624
+ (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
26626
-(define_insn "*movdf_softfloat32"
26627
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
26628
- (match_operand:DF 1 "input_operand" "r,Y,r,G,H,F"))]
26629
+(define_insn "*mov<mode>_softfloat32"
26630
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
26631
+ (match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))]
26632
"! TARGET_POWERPC64
26633
&& ((TARGET_FPRS && TARGET_SINGLE_FLOAT)
26634
|| TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
26635
- && (gpc_reg_operand (operands[0], DFmode)
26636
- || gpc_reg_operand (operands[1], DFmode))"
26637
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26638
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26640
[(set_attr "type" "store,load,two,*,*,*")
26641
(set_attr "length" "8,8,8,8,12,16")])
26643
-;; Reload patterns to support gpr load/store with misaligned mem.
26644
-;; and multiple gpr load/store at offset >= 0xfffc
26645
-(define_expand "reload_<mode>_store"
26646
- [(parallel [(match_operand 0 "memory_operand" "=m")
26647
- (match_operand 1 "gpc_reg_operand" "r")
26648
- (match_operand:GPR 2 "register_operand" "=&b")])]
26651
- rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
26655
-(define_expand "reload_<mode>_load"
26656
- [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
26657
- (match_operand 1 "memory_operand" "m")
26658
- (match_operand:GPR 2 "register_operand" "=b")])]
26661
- rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
26665
; ld/std require word-aligned displacements -> 'Y' constraint.
26666
; List Y->r and r->Y before r->r for reload.
26667
-(define_insn "*movdf_hardfloat64_mfpgpr"
26668
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r,r,d")
26669
- (match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
26670
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
26671
- && TARGET_DOUBLE_FLOAT
26672
- && (gpc_reg_operand (operands[0], DFmode)
26673
- || gpc_reg_operand (operands[1], DFmode))"
26674
+(define_insn "*mov<mode>_hardfloat64"
26675
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
26676
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
26677
+ "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
26678
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26679
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26684
- xxlor %x0,%x1,%x1
26685
- xxlor %x0,%x1,%x1
26688
- stxsd%U0x %x1,%y0
26689
- stxsd%U0x %x1,%y0
26694
+ stxsd%U0x %x1,%y0
26695
+ xxlor %x0,%x1,%x1
26705
- [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
26706
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
26708
-; ld/std require word-aligned displacements -> 'Y' constraint.
26709
-; List Y->r and r->Y before r->r for reload.
26710
-(define_insn "*movdf_hardfloat64"
26711
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r")
26712
- (match_operand:DF 1 "input_operand" "d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))]
26713
- "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
26714
- && TARGET_DOUBLE_FLOAT
26715
- && (gpc_reg_operand (operands[0], DFmode)
26716
- || gpc_reg_operand (operands[1], DFmode))"
26726
- stxsd%U0x %x1,%y0
26727
- stxsd%U0x %x1,%y0
26728
- xxlor %x0,%x1,%x1
26729
- xxlor %x0,%x1,%x1
26730
- xxlxor %x0,%x0,%x0
26737
- [(set_attr "type" "fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
26738
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
26744
+ [(set_attr_alternative "type"
26746
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26747
+ (const_string "fpstore_ux")
26749
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26750
+ (const_string "fpstore_u")
26751
+ (const_string "fpstore")))
26753
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26754
+ (const_string "fpload_ux")
26756
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26757
+ (const_string "fpload_u")
26758
+ (const_string "fpload")))
26759
+ (const_string "fp")
26761
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26762
+ (const_string "fpload_ux")
26763
+ (const_string "fpload"))
26765
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26766
+ (const_string "fpstore_ux")
26767
+ (const_string "fpstore"))
26768
+ (const_string "vecsimple")
26769
+ (const_string "vecsimple")
26771
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26772
+ (const_string "store_ux")
26774
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26775
+ (const_string "store_u")
26776
+ (const_string "store")))
26778
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26779
+ (const_string "load_ux")
26781
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26782
+ (const_string "load_u")
26783
+ (const_string "load")))
26784
+ (const_string "*")
26785
+ (const_string "mtjmpr")
26786
+ (const_string "mfjmpr")
26787
+ (const_string "*")
26788
+ (const_string "*")
26789
+ (const_string "*")
26790
+ (const_string "*")
26791
+ (const_string "mftgpr")
26792
+ (const_string "mffgpr")
26793
+ (const_string "mftgpr")
26794
+ (const_string "mffgpr")])
26795
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
26797
-(define_insn "*movdf_softfloat64"
26798
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
26799
- (match_operand:DF 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
26800
+(define_insn "*mov<mode>_softfloat64"
26801
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
26802
+ (match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
26803
"TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
26804
- && (gpc_reg_operand (operands[0], DFmode)
26805
- || gpc_reg_operand (operands[1], DFmode))"
26806
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26807
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26811
@@ -8241,38 +8957,57 @@
26815
- [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*")
26816
+ [(set_attr_alternative "type"
26818
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
26819
+ (const_string "store_ux")
26821
+ (match_test "update_address_mem (operands[0], VOIDmode)")
26822
+ (const_string "store_u")
26823
+ (const_string "store")))
26825
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
26826
+ (const_string "load_ux")
26828
+ (match_test "update_address_mem (operands[1], VOIDmode)")
26829
+ (const_string "load_u")
26830
+ (const_string "load")))
26831
+ (const_string "*")
26832
+ (const_string "mtjmpr")
26833
+ (const_string "mfjmpr")
26834
+ (const_string "*")
26835
+ (const_string "*")
26836
+ (const_string "*")
26837
+ (const_string "*")])
26838
(set_attr "length" "4,4,4,4,4,8,12,16,4")])
26840
-(define_expand "movtf"
26841
- [(set (match_operand:TF 0 "general_operand" "")
26842
- (match_operand:TF 1 "any_operand" ""))]
26843
- "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
26844
- "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
26845
+(define_expand "mov<mode>"
26846
+ [(set (match_operand:FMOVE128 0 "general_operand" "")
26847
+ (match_operand:FMOVE128 1 "any_operand" ""))]
26849
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
26851
;; It's important to list Y->r and r->Y before r->r because otherwise
26852
;; reload, given m->r, will try to pick r->r and reload it, which
26853
;; doesn't make progress.
26854
-(define_insn_and_split "*movtf_internal"
26855
- [(set (match_operand:TF 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
26856
- (match_operand:TF 1 "input_operand" "d,m,d,r,YGHF,r"))]
26857
- "!TARGET_IEEEQUAD
26858
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
26859
- && (gpc_reg_operand (operands[0], TFmode)
26860
- || gpc_reg_operand (operands[1], TFmode))"
26861
+(define_insn_and_split "*mov<mode>_internal"
26862
+ [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
26863
+ (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
26864
+ "TARGET_HARD_FLOAT && TARGET_FPRS
26865
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26866
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26868
"&& reload_completed"
26870
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
26871
[(set_attr "length" "8,8,8,20,20,16")])
26873
-(define_insn_and_split "*movtf_softfloat"
26874
- [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=Y,r,r")
26875
- (match_operand:TF 1 "input_operand" "r,YGHF,r"))]
26876
- "!TARGET_IEEEQUAD
26877
- && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
26878
- && (gpc_reg_operand (operands[0], TFmode)
26879
- || gpc_reg_operand (operands[1], TFmode))"
26880
+(define_insn_and_split "*mov<mode>_softfloat"
26881
+ [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
26882
+ (match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))]
26883
+ "(TARGET_SOFT_FLOAT || !TARGET_FPRS)
26884
+ && (gpc_reg_operand (operands[0], <MODE>mode)
26885
+ || gpc_reg_operand (operands[1], <MODE>mode))"
26887
"&& reload_completed"
26889
@@ -8557,6 +9292,243 @@
26890
operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
26893
+;; Reload helper functions used by rs6000_secondary_reload. The patterns all
26894
+;; must have 3 arguments, and scratch register constraint must be a single
26897
+;; Reload patterns to support gpr load/store with misaligned mem.
26898
+;; and multiple gpr load/store at offset >= 0xfffc
26899
+(define_expand "reload_<mode>_store"
26900
+ [(parallel [(match_operand 0 "memory_operand" "=m")
26901
+ (match_operand 1 "gpc_reg_operand" "r")
26902
+ (match_operand:GPR 2 "register_operand" "=&b")])]
26905
+ rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
26909
+(define_expand "reload_<mode>_load"
26910
+ [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
26911
+ (match_operand 1 "memory_operand" "m")
26912
+ (match_operand:GPR 2 "register_operand" "=b")])]
26915
+ rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
26920
+;; Power8 merge instructions to allow direct move to/from floating point
26921
+;; registers in 32-bit mode. We use TF mode to get two registers to move the
26922
+;; individual 32-bit parts across. Subreg doesn't work too well on the TF
26923
+;; value, since it is allocated in reload and not all of the flow information
26924
+;; is setup for it. We have two patterns to do the two moves between gprs and
26925
+;; fprs. There isn't a dependancy between the two, but we could potentially
26926
+;; schedule other instructions between the two instructions. TFmode is
26927
+;; currently limited to traditional FPR registers. If/when this is changed, we
26928
+;; will need to revist %L to make sure it works with VSX registers, or add an
26929
+;; %x version of %L.
26931
+(define_insn "p8_fmrgow_<mode>"
26932
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
26933
+ (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
26934
+ UNSPEC_P8V_FMRGOW))]
26935
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26936
+ "fmrgow %0,%1,%L1"
26937
+ [(set_attr "type" "vecperm")])
26939
+(define_insn "p8_mtvsrwz_1"
26940
+ [(set (match_operand:TF 0 "register_operand" "=d")
26941
+ (unspec:TF [(match_operand:SI 1 "register_operand" "r")]
26942
+ UNSPEC_P8V_MTVSRWZ))]
26943
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26945
+ [(set_attr "type" "mftgpr")])
26947
+(define_insn "p8_mtvsrwz_2"
26948
+ [(set (match_operand:TF 0 "register_operand" "+d")
26949
+ (unspec:TF [(match_dup 0)
26950
+ (match_operand:SI 1 "register_operand" "r")]
26951
+ UNSPEC_P8V_MTVSRWZ))]
26952
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26954
+ [(set_attr "type" "mftgpr")])
26956
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
26957
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
26958
+ (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
26959
+ UNSPEC_P8V_RELOAD_FROM_GPR))
26960
+ (clobber (match_operand:TF 2 "register_operand" "=d"))]
26961
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26963
+ "&& reload_completed"
26966
+ rtx dest = operands[0];
26967
+ rtx src = operands[1];
26968
+ rtx tmp = operands[2];
26969
+ rtx gpr_hi_reg = gen_highpart (SImode, src);
26970
+ rtx gpr_lo_reg = gen_lowpart (SImode, src);
26972
+ emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
26973
+ emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
26974
+ emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
26977
+ [(set_attr "length" "12")
26978
+ (set_attr "type" "three")])
26980
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
26981
+(define_insn "p8_mtvsrd_1"
26982
+ [(set (match_operand:TF 0 "register_operand" "=ws")
26983
+ (unspec:TF [(match_operand:DI 1 "register_operand" "r")]
26984
+ UNSPEC_P8V_MTVSRD))]
26985
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26987
+ [(set_attr "type" "mftgpr")])
26989
+(define_insn "p8_mtvsrd_2"
26990
+ [(set (match_operand:TF 0 "register_operand" "+ws")
26991
+ (unspec:TF [(match_dup 0)
26992
+ (match_operand:DI 1 "register_operand" "r")]
26993
+ UNSPEC_P8V_MTVSRD))]
26994
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
26996
+ [(set_attr "type" "mftgpr")])
26998
+(define_insn "p8_xxpermdi_<mode>"
26999
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
27000
+ (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
27001
+ UNSPEC_P8V_XXPERMDI))]
27002
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27003
+ "xxpermdi %x0,%1,%L1,0"
27004
+ [(set_attr "type" "vecperm")])
27006
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
27007
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
27008
+ (unspec:FMOVE128_GPR
27009
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
27010
+ UNSPEC_P8V_RELOAD_FROM_GPR))
27011
+ (clobber (match_operand:TF 2 "register_operand" "=ws"))]
27012
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27014
+ "&& reload_completed"
27017
+ rtx dest = operands[0];
27018
+ rtx src = operands[1];
27019
+ rtx tmp = operands[2];
27020
+ rtx gpr_hi_reg = gen_highpart (DImode, src);
27021
+ rtx gpr_lo_reg = gen_lowpart (DImode, src);
27023
+ emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
27024
+ emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
27025
+ emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
27027
+ [(set_attr "length" "12")
27028
+ (set_attr "type" "three")])
27030
+;; Move SFmode to a VSX from a GPR register. Because scalar floating point
27031
+;; type is stored internally as double precision in the VSX registers, we have
27032
+;; to convert it from the vector format.
27034
+(define_insn_and_split "reload_vsx_from_gprsf"
27035
+ [(set (match_operand:SF 0 "register_operand" "=wa")
27036
+ (unspec:SF [(match_operand:SF 1 "register_operand" "r")]
27037
+ UNSPEC_P8V_RELOAD_FROM_GPR))
27038
+ (clobber (match_operand:DI 2 "register_operand" "=r"))]
27039
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27041
+ "&& reload_completed"
27044
+ rtx op0 = operands[0];
27045
+ rtx op1 = operands[1];
27046
+ rtx op2 = operands[2];
27047
+ rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
27048
+ rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
27050
+ /* Move SF value to upper 32-bits for xscvspdpn. */
27051
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
27052
+ emit_move_insn (op0_di, op2);
27053
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
27056
+ [(set_attr "length" "8")
27057
+ (set_attr "type" "two")])
27059
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
27060
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
27061
+;; and then doing a move of that.
27062
+(define_insn "p8_mfvsrd_3_<mode>"
27063
+ [(set (match_operand:DF 0 "register_operand" "=r")
27064
+ (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
27065
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
27066
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27068
+ [(set_attr "type" "mftgpr")])
27070
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
27071
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
27072
+ (unspec:FMOVE128_GPR
27073
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
27074
+ UNSPEC_P8V_RELOAD_FROM_VSX))
27075
+ (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
27076
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27078
+ "&& reload_completed"
27081
+ rtx dest = operands[0];
27082
+ rtx src = operands[1];
27083
+ rtx tmp = operands[2];
27084
+ rtx gpr_hi_reg = gen_highpart (DFmode, dest);
27085
+ rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
27087
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
27088
+ emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
27089
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
27091
+ [(set_attr "length" "12")
27092
+ (set_attr "type" "three")])
27094
+;; Move SFmode to a GPR from a VSX register. Because scalar floating point
27095
+;; type is stored internally as double precision, we have to convert it to the
27098
+(define_insn_and_split "reload_gpr_from_vsxsf"
27099
+ [(set (match_operand:SF 0 "register_operand" "=r")
27100
+ (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
27101
+ UNSPEC_P8V_RELOAD_FROM_VSX))
27102
+ (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
27103
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27105
+ "&& reload_completed"
27108
+ rtx op0 = operands[0];
27109
+ rtx op1 = operands[1];
27110
+ rtx op2 = operands[2];
27111
+ rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
27113
+ emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
27114
+ emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
27115
+ emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
27118
+ [(set_attr "length" "12")
27119
+ (set_attr "type" "three")])
27121
+(define_insn "p8_mfvsrd_4_disf"
27122
+ [(set (match_operand:DI 0 "register_operand" "=r")
27123
+ (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
27124
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
27125
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
27127
+ [(set_attr "type" "mftgpr")])
27130
;; Next come the multi-word integer load and store and the load and store
27133
@@ -8565,8 +9537,8 @@
27134
;; Use of fprs is disparaged slightly otherwise reload prefers to reload
27135
;; a gpr into a fpr instead of reloading an invalid 'Y' address
27136
(define_insn "*movdi_internal32"
27137
- [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
27138
- (match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
27139
+ [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
27140
+ (match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
27141
"! TARGET_POWERPC64
27142
&& (gpc_reg_operand (operands[0], DImode)
27143
|| gpc_reg_operand (operands[1], DImode))"
27144
@@ -8577,15 +9549,34 @@
27149
- xxlxor %x0,%x0,%x0"
27150
- [(set_attr "type" "store,load,*,fpstore,fpload,fp,*,vecsimple")])
27152
+ [(set_attr_alternative "type"
27153
+ [(const_string "store")
27154
+ (const_string "load")
27155
+ (const_string "*")
27157
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
27158
+ (const_string "fpstore_ux")
27160
+ (match_test "update_address_mem (operands[0], VOIDmode)")
27161
+ (const_string "fpstore_u")
27162
+ (const_string "fpstore")))
27164
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
27165
+ (const_string "fpload_ux")
27167
+ (match_test "update_address_mem (operands[1], VOIDmode)")
27168
+ (const_string "fpload_u")
27169
+ (const_string "fpload")))
27170
+ (const_string "fp")
27171
+ (const_string "*")])])
27174
[(set (match_operand:DI 0 "gpc_reg_operand" "")
27175
(match_operand:DI 1 "const_int_operand" ""))]
27176
"! TARGET_POWERPC64 && reload_completed
27177
- && gpr_or_gpr_p (operands[0], operands[1])"
27178
+ && gpr_or_gpr_p (operands[0], operands[1])
27179
+ && !direct_move_p (operands[0], operands[1])"
27180
[(set (match_dup 2) (match_dup 4))
27181
(set (match_dup 3) (match_dup 1))]
27183
@@ -8607,14 +9598,15 @@
27184
[(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
27185
(match_operand:DIFD 1 "input_operand" ""))]
27186
"reload_completed && !TARGET_POWERPC64
27187
- && gpr_or_gpr_p (operands[0], operands[1])"
27188
+ && gpr_or_gpr_p (operands[0], operands[1])
27189
+ && !direct_move_p (operands[0], operands[1])"
27191
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
27193
-(define_insn "*movdi_mfpgpr"
27194
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*d")
27195
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
27196
- "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
27197
+(define_insn "*movdi_internal64"
27198
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
27199
+ (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
27200
+ "TARGET_POWERPC64
27201
&& (gpc_reg_operand (operands[0], DImode)
27202
|| gpc_reg_operand (operands[1], DImode))"
27204
@@ -8631,33 +9623,52 @@
27209
- [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
27210
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
27214
+ [(set_attr_alternative "type"
27216
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
27217
+ (const_string "store_ux")
27219
+ (match_test "update_address_mem (operands[0], VOIDmode)")
27220
+ (const_string "store_u")
27221
+ (const_string "store")))
27223
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
27224
+ (const_string "load_ux")
27226
+ (match_test "update_address_mem (operands[1], VOIDmode)")
27227
+ (const_string "load_u")
27228
+ (const_string "load")))
27229
+ (const_string "*")
27230
+ (const_string "*")
27231
+ (const_string "*")
27232
+ (const_string "*")
27234
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
27235
+ (const_string "fpstore_ux")
27237
+ (match_test "update_address_mem (operands[0], VOIDmode)")
27238
+ (const_string "fpstore_u")
27239
+ (const_string "fpstore")))
27241
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
27242
+ (const_string "fpload_ux")
27244
+ (match_test "update_address_mem (operands[1], VOIDmode)")
27245
+ (const_string "fpload_u")
27246
+ (const_string "fpload")))
27247
+ (const_string "fp")
27248
+ (const_string "mfjmpr")
27249
+ (const_string "mtjmpr")
27250
+ (const_string "*")
27251
+ (const_string "mftgpr")
27252
+ (const_string "mffgpr")
27253
+ (const_string "mftgpr")
27254
+ (const_string "mffgpr")])
27255
+ (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
27257
-(define_insn "*movdi_internal64"
27258
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,?wa")
27259
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,O"))]
27260
- "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
27261
- && (gpc_reg_operand (operands[0], DImode)
27262
- || gpc_reg_operand (operands[1], DImode))"
27276
- xxlxor %x0,%x0,%x0"
27277
- [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,vecsimple")
27278
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4")])
27280
;; immediate value valid for a single instruction hiding in a const_double
27282
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
27283
@@ -8719,14 +9730,16 @@
27287
-;; TImode is similar, except that we usually want to compute the address into
27288
-;; a register and use lsi/stsi (the exception is during reload).
27289
+;; TImode/PTImode is similar, except that we usually want to compute the
27290
+;; address into a register and use lsi/stsi (the exception is during reload).
27292
-(define_insn "*movti_string"
27293
- [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
27294
- (match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))]
27295
+(define_insn "*mov<mode>_string"
27296
+ [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
27297
+ (match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))]
27298
"! TARGET_POWERPC64
27299
- && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
27300
+ && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
27301
+ && (gpc_reg_operand (operands[0], <MODE>mode)
27302
+ || gpc_reg_operand (operands[1], <MODE>mode))"
27305
switch (which_alternative)
27306
@@ -8756,27 +9769,32 @@
27307
(const_string "always")
27308
(const_string "conditional")))])
27310
-(define_insn "*movti_ppc64"
27311
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Y,r,r")
27312
- (match_operand:TI 1 "input_operand" "r,Y,r"))]
27313
- "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
27314
- || gpc_reg_operand (operands[1], TImode)))
27315
- && VECTOR_MEM_NONE_P (TImode)"
27317
- [(set_attr "type" "store,load,*")])
27318
+(define_insn "*mov<mode>_ppc64"
27319
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
27320
+ (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
27321
+ "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
27322
+ && (gpc_reg_operand (operands[0], <MODE>mode)
27323
+ || gpc_reg_operand (operands[1], <MODE>mode)))"
27325
+ return rs6000_output_move_128bit (operands);
27327
+ [(set_attr "type" "store,store,load,load,*,*")
27328
+ (set_attr "length" "8")])
27331
- [(set (match_operand:TI 0 "gpc_reg_operand" "")
27332
- (match_operand:TI 1 "const_double_operand" ""))]
27333
- "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
27334
+ [(set (match_operand:TI2 0 "int_reg_operand" "")
27335
+ (match_operand:TI2 1 "const_double_operand" ""))]
27336
+ "TARGET_POWERPC64
27337
+ && (VECTOR_MEM_NONE_P (<MODE>mode)
27338
+ || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
27339
[(set (match_dup 2) (match_dup 4))
27340
(set (match_dup 3) (match_dup 5))]
27343
operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
27346
operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
27349
if (GET_CODE (operands[1]) == CONST_DOUBLE)
27351
operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
27352
@@ -8792,10 +9810,12 @@
27356
- [(set (match_operand:TI 0 "nonimmediate_operand" "")
27357
- (match_operand:TI 1 "input_operand" ""))]
27358
- "reload_completed && VECTOR_MEM_NONE_P (TImode)
27359
- && gpr_or_gpr_p (operands[0], operands[1])"
27360
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "")
27361
+ (match_operand:TI2 1 "input_operand" ""))]
27362
+ "reload_completed
27363
+ && gpr_or_gpr_p (operands[0], operands[1])
27364
+ && !direct_move_p (operands[0], operands[1])
27365
+ && !quad_load_store_p (operands[0], operands[1])"
27367
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
27369
@@ -9651,7 +10671,7 @@
27370
(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
27372
(clobber (reg:SI LR_REGNO))]
27373
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
27374
+ "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
27376
if (TARGET_CMODEL != CMODEL_SMALL)
27377
return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;"
27378
@@ -9759,7 +10779,8 @@
27379
(unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
27381
(clobber (reg:SI LR_REGNO))]
27382
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
27383
+ "HAVE_AS_TLS && TARGET_TLS_MARKERS
27384
+ && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
27385
"bl %z1(%3@tlsgd)\;nop"
27386
[(set_attr "type" "branch")
27387
(set_attr "length" "8")])
27388
@@ -9791,7 +10812,7 @@
27389
(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
27391
(clobber (reg:SI LR_REGNO))]
27392
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
27393
+ "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
27395
if (TARGET_CMODEL != CMODEL_SMALL)
27396
return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;"
27397
@@ -9892,7 +10913,8 @@
27398
(match_operand 2 "" "g")))
27399
(unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
27400
(clobber (reg:SI LR_REGNO))]
27401
- "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
27402
+ "HAVE_AS_TLS && TARGET_TLS_MARKERS
27403
+ && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
27404
"bl %z1(%&@tlsld)\;nop"
27405
[(set_attr "type" "branch")
27406
(set_attr "length" "8")])
27407
@@ -10261,7 +11283,7 @@
27408
[(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
27409
(unspec:SI [(const_int 0)] UNSPEC_TOC))
27410
(use (reg:SI 2))])]
27411
- "DEFAULT_ABI == ABI_AIX && TARGET_32BIT"
27412
+ "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT"
27416
@@ -10276,7 +11298,7 @@
27417
[(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
27418
(unspec:DI [(const_int 0)] UNSPEC_TOC))
27419
(use (reg:DI 2))])]
27420
- "DEFAULT_ABI == ABI_AIX && TARGET_64BIT"
27421
+ "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT"
27425
@@ -10306,7 +11328,7 @@
27426
[(parallel [(set (reg:SI LR_REGNO)
27427
(match_operand:SI 0 "immediate_operand" "s"))
27428
(use (unspec [(match_dup 0)] UNSPEC_TOC))])]
27429
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX
27430
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4
27431
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
27434
@@ -10314,7 +11336,7 @@
27435
[(set (reg:SI LR_REGNO)
27436
(match_operand:SI 0 "immediate_operand" "s"))
27437
(use (unspec [(match_dup 0)] UNSPEC_TOC))]
27438
- "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
27439
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
27440
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
27441
"bcl 20,31,%0\\n%0:"
27442
[(set_attr "type" "branch")
27443
@@ -10324,7 +11346,7 @@
27444
[(set (reg:SI LR_REGNO)
27445
(match_operand:SI 0 "immediate_operand" "s"))
27446
(use (unspec [(match_dup 0)] UNSPEC_TOC))]
27447
- "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
27448
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
27449
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
27452
@@ -10344,7 +11366,7 @@
27453
(label_ref (match_operand 1 "" ""))]
27456
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
27457
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
27460
(define_insn "load_toc_v4_PIC_1b_normal"
27461
@@ -10353,7 +11375,7 @@
27462
(label_ref (match_operand 1 "" ""))]
27465
- "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
27466
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
27467
"bcl 20,31,$+8\;.long %0-$"
27468
[(set_attr "type" "branch")
27469
(set_attr "length" "8")])
27470
@@ -10364,7 +11386,7 @@
27471
(label_ref (match_operand 1 "" ""))]
27474
- "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
27475
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
27479
@@ -10382,7 +11404,7 @@
27480
(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
27481
(minus:SI (match_operand:SI 2 "immediate_operand" "s")
27482
(match_operand:SI 3 "immediate_operand" "s")))))]
27483
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
27484
+ "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
27486
[(set_attr "type" "load")])
27488
@@ -10392,7 +11414,7 @@
27490
(minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
27491
(match_operand:SI 3 "symbol_ref_operand" "s")))))]
27492
- "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
27493
+ "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
27494
"addis %0,%1,%2-%3@ha")
27496
(define_insn "load_toc_v4_PIC_3c"
27497
@@ -10400,7 +11422,7 @@
27498
(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
27499
(minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
27500
(match_operand:SI 3 "symbol_ref_operand" "s"))))]
27501
- "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
27502
+ "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
27503
"addi %0,%1,%2-%3@l")
27505
;; If the TOC is shared over a translation unit, as happens with all
27506
@@ -10542,8 +11564,13 @@
27508
operands[0] = XEXP (operands[0], 0);
27510
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27512
+ rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]);
27516
if (GET_CODE (operands[0]) != SYMBOL_REF
27517
- || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[0]))
27518
|| (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
27520
if (INTVAL (operands[2]) & CALL_LONG)
27521
@@ -10556,12 +11583,6 @@
27522
operands[0] = force_reg (Pmode, operands[0]);
27526
- /* AIX function pointers are really pointers to a three word
27528
- rs6000_call_indirect_aix (NULL_RTX, operands[0], operands[1]);
27532
gcc_unreachable ();
27534
@@ -10587,8 +11608,13 @@
27536
operands[1] = XEXP (operands[1], 0);
27538
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27540
+ rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]);
27544
if (GET_CODE (operands[1]) != SYMBOL_REF
27545
- || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[1]))
27546
|| (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
27548
if (INTVAL (operands[3]) & CALL_LONG)
27549
@@ -10601,12 +11627,6 @@
27550
operands[1] = force_reg (Pmode, operands[1]);
27554
- /* AIX function pointers are really pointers to a three word
27556
- rs6000_call_indirect_aix (operands[0], operands[1], operands[2]);
27560
gcc_unreachable ();
27562
@@ -10698,136 +11718,7 @@
27563
[(set_attr "type" "branch")
27564
(set_attr "length" "4,8")])
27566
-;; Call to indirect functions with the AIX abi using a 3 word descriptor.
27567
-;; Operand0 is the addresss of the function to call
27568
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
27569
-;; Operand2 is the location in the function descriptor to load r2 from
27570
-;; Operand3 is the stack location to hold the current TOC pointer
27572
-(define_insn "call_indirect_aix<ptrsize>"
27573
- [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
27574
- (match_operand 1 "" "g,g"))
27575
- (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
27576
- (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27577
- (use (reg:P STATIC_CHAIN_REGNUM))
27578
- (clobber (reg:P LR_REGNO))]
27579
- "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
27580
- "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
27581
- [(set_attr "type" "jmpreg")
27582
- (set_attr "length" "12")])
27584
-;; Like call_indirect_aix<ptrsize>, but no use of the static chain
27585
-;; Operand0 is the addresss of the function to call
27586
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
27587
-;; Operand2 is the location in the function descriptor to load r2 from
27588
-;; Operand3 is the stack location to hold the current TOC pointer
27590
-(define_insn "call_indirect_aix<ptrsize>_nor11"
27591
- [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
27592
- (match_operand 1 "" "g,g"))
27593
- (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
27594
- (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27595
- (clobber (reg:P LR_REGNO))]
27596
- "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
27597
- "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
27598
- [(set_attr "type" "jmpreg")
27599
- (set_attr "length" "12")])
27601
-;; Operand0 is the return result of the function
27602
-;; Operand1 is the addresss of the function to call
27603
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
27604
-;; Operand3 is the location in the function descriptor to load r2 from
27605
-;; Operand4 is the stack location to hold the current TOC pointer
27607
-(define_insn "call_value_indirect_aix<ptrsize>"
27608
- [(set (match_operand 0 "" "")
27609
- (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
27610
- (match_operand 2 "" "g,g")))
27611
- (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27612
- (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
27613
- (use (reg:P STATIC_CHAIN_REGNUM))
27614
- (clobber (reg:P LR_REGNO))]
27615
- "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
27616
- "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
27617
- [(set_attr "type" "jmpreg")
27618
- (set_attr "length" "12")])
27620
-;; Like call_value_indirect_aix<ptrsize>, but no use of the static chain
27621
-;; Operand0 is the return result of the function
27622
-;; Operand1 is the addresss of the function to call
27623
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
27624
-;; Operand3 is the location in the function descriptor to load r2 from
27625
-;; Operand4 is the stack location to hold the current TOC pointer
27627
-(define_insn "call_value_indirect_aix<ptrsize>_nor11"
27628
- [(set (match_operand 0 "" "")
27629
- (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
27630
- (match_operand 2 "" "g,g")))
27631
- (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27632
- (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
27633
- (clobber (reg:P LR_REGNO))]
27634
- "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
27635
- "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
27636
- [(set_attr "type" "jmpreg")
27637
- (set_attr "length" "12")])
27639
-;; Call to function which may be in another module. Restore the TOC
27640
-;; pointer (r2) after the call unless this is System V.
27641
-;; Operand2 is nonzero if we are using the V.4 calling sequence and
27642
-;; either the function was not prototyped, or it was prototyped as a
27643
-;; variable argument function. It is > 0 if FP registers were passed
27644
-;; and < 0 if they were not.
27646
-(define_insn "*call_nonlocal_aix32"
27647
- [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
27648
- (match_operand 1 "" "g"))
27649
- (use (match_operand:SI 2 "immediate_operand" "O"))
27650
- (clobber (reg:SI LR_REGNO))]
27652
- && DEFAULT_ABI == ABI_AIX
27653
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
27655
- [(set_attr "type" "branch")
27656
- (set_attr "length" "8")])
27658
-(define_insn "*call_nonlocal_aix64"
27659
- [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
27660
- (match_operand 1 "" "g"))
27661
- (use (match_operand:SI 2 "immediate_operand" "O"))
27662
- (clobber (reg:SI LR_REGNO))]
27664
- && DEFAULT_ABI == ABI_AIX
27665
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
27667
- [(set_attr "type" "branch")
27668
- (set_attr "length" "8")])
27670
-(define_insn "*call_value_nonlocal_aix32"
27671
- [(set (match_operand 0 "" "")
27672
- (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
27673
- (match_operand 2 "" "g")))
27674
- (use (match_operand:SI 3 "immediate_operand" "O"))
27675
- (clobber (reg:SI LR_REGNO))]
27677
- && DEFAULT_ABI == ABI_AIX
27678
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
27680
- [(set_attr "type" "branch")
27681
- (set_attr "length" "8")])
27683
-(define_insn "*call_value_nonlocal_aix64"
27684
- [(set (match_operand 0 "" "")
27685
- (call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
27686
- (match_operand 2 "" "g")))
27687
- (use (match_operand:SI 3 "immediate_operand" "O"))
27688
- (clobber (reg:SI LR_REGNO))]
27690
- && DEFAULT_ABI == ABI_AIX
27691
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
27693
- [(set_attr "type" "branch")
27694
- (set_attr "length" "8")])
27696
;; A function pointer under System V is just a normal pointer
27697
;; operands[0] is the function pointer
27698
;; operands[1] is the stack size to clean up
27699
@@ -11009,6 +11900,104 @@
27700
[(set_attr "type" "branch,branch")
27701
(set_attr "length" "4,8")])
27704
+;; Call to AIX abi function in the same module.
27706
+(define_insn "*call_local_aix<mode>"
27707
+ [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s"))
27708
+ (match_operand 1 "" "g"))
27709
+ (clobber (reg:P LR_REGNO))]
27710
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27712
+ [(set_attr "type" "branch")
27713
+ (set_attr "length" "4")])
27715
+(define_insn "*call_value_local_aix<mode>"
27716
+ [(set (match_operand 0 "" "")
27717
+ (call (mem:SI (match_operand:P 1 "current_file_function_operand" "s"))
27718
+ (match_operand 2 "" "g")))
27719
+ (clobber (reg:P LR_REGNO))]
27720
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27722
+ [(set_attr "type" "branch")
27723
+ (set_attr "length" "4")])
27725
+;; Call to AIX abi function which may be in another module.
27726
+;; Restore the TOC pointer (r2) after the call.
27728
+(define_insn "*call_nonlocal_aix<mode>"
27729
+ [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s"))
27730
+ (match_operand 1 "" "g"))
27731
+ (clobber (reg:P LR_REGNO))]
27732
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27734
+ [(set_attr "type" "branch")
27735
+ (set_attr "length" "8")])
27737
+(define_insn "*call_value_nonlocal_aix<mode>"
27738
+ [(set (match_operand 0 "" "")
27739
+ (call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s"))
27740
+ (match_operand 2 "" "g")))
27741
+ (clobber (reg:P LR_REGNO))]
27742
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27744
+ [(set_attr "type" "branch")
27745
+ (set_attr "length" "8")])
27747
+;; Call to indirect functions with the AIX abi using a 3 word descriptor.
27748
+;; Operand0 is the addresss of the function to call
27749
+;; Operand2 is the location in the function descriptor to load r2 from
27750
+;; Operand3 is the stack location to hold the current TOC pointer
27752
+(define_insn "*call_indirect_aix<mode>"
27753
+ [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
27754
+ (match_operand 1 "" "g,g"))
27755
+ (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
27756
+ (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27757
+ (clobber (reg:P LR_REGNO))]
27758
+ "DEFAULT_ABI == ABI_AIX"
27759
+ "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
27760
+ [(set_attr "type" "jmpreg")
27761
+ (set_attr "length" "12")])
27763
+(define_insn "*call_value_indirect_aix<mode>"
27764
+ [(set (match_operand 0 "" "")
27765
+ (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
27766
+ (match_operand 2 "" "g,g")))
27767
+ (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27768
+ (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
27769
+ (clobber (reg:P LR_REGNO))]
27770
+ "DEFAULT_ABI == ABI_AIX"
27771
+ "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
27772
+ [(set_attr "type" "jmpreg")
27773
+ (set_attr "length" "12")])
27775
+;; Call to indirect functions with the ELFv2 ABI.
27776
+;; Operand0 is the addresss of the function to call
27777
+;; Operand2 is the stack location to hold the current TOC pointer
27779
+(define_insn "*call_indirect_elfv2<mode>"
27780
+ [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
27781
+ (match_operand 1 "" "g,g"))
27782
+ (set (reg:P TOC_REGNUM) (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
27783
+ (clobber (reg:P LR_REGNO))]
27784
+ "DEFAULT_ABI == ABI_ELFv2"
27785
+ "b%T0l\;<ptrload> 2,%2"
27786
+ [(set_attr "type" "jmpreg")
27787
+ (set_attr "length" "8")])
27789
+(define_insn "*call_value_indirect_elfv2<mode>"
27790
+ [(set (match_operand 0 "" "")
27791
+ (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
27792
+ (match_operand 2 "" "g,g")))
27793
+ (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
27794
+ (clobber (reg:P LR_REGNO))]
27795
+ "DEFAULT_ABI == ABI_ELFv2"
27796
+ "b%T1l\;<ptrload> 2,%3"
27797
+ [(set_attr "type" "jmpreg")
27798
+ (set_attr "length" "8")])
27801
;; Call subroutine returning any type.
27802
(define_expand "untyped_call"
27803
[(parallel [(call (match_operand 0 "" "")
27804
@@ -11056,8 +12045,41 @@
27805
gcc_assert (GET_CODE (operands[1]) == CONST_INT);
27807
operands[0] = XEXP (operands[0], 0);
27809
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27811
+ rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]);
27816
+(define_expand "sibcall_value"
27817
+ [(parallel [(set (match_operand 0 "register_operand" "")
27818
+ (call (mem:SI (match_operand 1 "address_operand" ""))
27819
+ (match_operand 2 "" "")))
27820
+ (use (match_operand 3 "" ""))
27821
+ (use (reg:SI LR_REGNO))
27822
+ (simple_return)])]
27827
+ if (MACHOPIC_INDIRECT)
27828
+ operands[1] = machopic_indirect_call_target (operands[1]);
27831
+ gcc_assert (GET_CODE (operands[1]) == MEM);
27832
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
27834
+ operands[1] = XEXP (operands[1], 0);
27836
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27838
+ rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]);
27843
;; this and similar patterns must be marked as using LR, otherwise
27844
;; dataflow will try to delete the store into it. This is true
27845
;; even when the actual reg to jump to is in CTR, when LR was
27846
@@ -11123,7 +12145,6 @@
27847
[(set_attr "type" "branch")
27848
(set_attr "length" "4,8")])
27851
(define_insn "*sibcall_value_local64"
27852
[(set (match_operand 0 "" "")
27853
(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
27854
@@ -11145,35 +12166,6 @@
27855
[(set_attr "type" "branch")
27856
(set_attr "length" "4,8")])
27858
-(define_insn "*sibcall_nonlocal_aix<mode>"
27859
- [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
27860
- (match_operand 1 "" "g,g"))
27861
- (use (match_operand:SI 2 "immediate_operand" "O,O"))
27862
- (use (reg:SI LR_REGNO))
27864
- "DEFAULT_ABI == ABI_AIX
27865
- && (INTVAL (operands[2]) & CALL_LONG) == 0"
27869
- [(set_attr "type" "branch")
27870
- (set_attr "length" "4")])
27872
-(define_insn "*sibcall_value_nonlocal_aix<mode>"
27873
- [(set (match_operand 0 "" "")
27874
- (call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
27875
- (match_operand 2 "" "g,g")))
27876
- (use (match_operand:SI 3 "immediate_operand" "O,O"))
27877
- (use (reg:SI LR_REGNO))
27879
- "DEFAULT_ABI == ABI_AIX
27880
- && (INTVAL (operands[3]) & CALL_LONG) == 0"
27884
- [(set_attr "type" "branch")
27885
- (set_attr "length" "4")])
27887
(define_insn "*sibcall_nonlocal_sysv<mode>"
27888
[(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c"))
27889
(match_operand 1 "" ""))
27890
@@ -11204,27 +12196,6 @@
27891
[(set_attr "type" "branch")
27892
(set_attr "length" "4,8,4,8")])
27894
-(define_expand "sibcall_value"
27895
- [(parallel [(set (match_operand 0 "register_operand" "")
27896
- (call (mem:SI (match_operand 1 "address_operand" ""))
27897
- (match_operand 2 "" "")))
27898
- (use (match_operand 3 "" ""))
27899
- (use (reg:SI LR_REGNO))
27900
- (simple_return)])]
27905
- if (MACHOPIC_INDIRECT)
27906
- operands[1] = machopic_indirect_call_target (operands[1]);
27909
- gcc_assert (GET_CODE (operands[1]) == MEM);
27910
- gcc_assert (GET_CODE (operands[2]) == CONST_INT);
27912
- operands[1] = XEXP (operands[1], 0);
27915
(define_insn "*sibcall_value_nonlocal_sysv<mode>"
27916
[(set (match_operand 0 "" "")
27917
(call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c"))
27918
@@ -11256,6 +12227,31 @@
27919
[(set_attr "type" "branch")
27920
(set_attr "length" "4,8,4,8")])
27922
+;; AIX ABI sibling call patterns.
27924
+(define_insn "*sibcall_aix<mode>"
27925
+ [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
27926
+ (match_operand 1 "" "g,g"))
27928
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27932
+ [(set_attr "type" "branch")
27933
+ (set_attr "length" "4")])
27935
+(define_insn "*sibcall_value_aix<mode>"
27936
+ [(set (match_operand 0 "" "")
27937
+ (call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
27938
+ (match_operand 2 "" "g,g")))
27940
+ "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
27944
+ [(set_attr "type" "branch")
27945
+ (set_attr "length" "4")])
27947
(define_expand "sibcall_epilogue"
27948
[(use (const_int 0))]
27950
@@ -11294,7 +12290,14 @@
27951
operands[1] = gen_rtx_REG (Pmode, 0);
27952
return "st<wd>%U0%X0 %1,%0";
27954
- [(set_attr "type" "store")
27955
+ [(set (attr "type")
27957
+ (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
27958
+ (const_string "store_ux")
27960
+ (match_test "update_address_mem (operands[0], VOIDmode)")
27961
+ (const_string "store_u")
27962
+ (const_string "store"))))
27963
(set_attr "length" "4")])
27965
(define_insn "probe_stack_range<P:mode>"
27966
@@ -11589,23 +12592,6 @@
27967
[(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
27968
(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
27970
-(define_insn "*cmpsf_internal1"
27971
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
27972
- (compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "f")
27973
- (match_operand:SF 2 "gpc_reg_operand" "f")))]
27974
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
27976
- [(set_attr "type" "fpcompare")])
27978
-(define_insn "*cmpdf_internal1"
27979
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
27980
- (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
27981
- (match_operand:DF 2 "gpc_reg_operand" "d")))]
27982
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
27983
- && !VECTOR_UNIT_VSX_P (DFmode)"
27985
- [(set_attr "type" "fpcompare")])
27987
;; Only need to compare second words if first words equal
27988
(define_insn "*cmptf_internal1"
27989
[(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
27990
@@ -13501,6 +14487,14 @@
27992
[(set_attr "type" "mfcr")])
27994
+(define_insn "*crsave"
27995
+ [(match_parallel 0 "crsave_operation"
27996
+ [(set (match_operand:SI 1 "memory_operand" "=m")
27997
+ (match_operand:SI 2 "gpc_reg_operand" "r"))])]
28000
+ [(set_attr "type" "store")])
28002
(define_insn "*stmw"
28003
[(match_parallel 0 "stmw_operation"
28004
[(set (match_operand:SI 1 "memory_operand" "=m")
28005
@@ -13885,7 +14879,7 @@
28006
(match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
28009
- [(set_attr "type" "integer")])
28010
+ [(set_attr "type" "popcnt")])
28013
;; Builtin fma support. Handle
28014
@@ -13900,6 +14894,20 @@
28018
+(define_insn "*fma<mode>4_fpr"
28019
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
28021
+ (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>,<Fv>")
28022
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
28023
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))]
28024
+ "TARGET_<MODE>_FPR"
28026
+ fmadd<Ftrad> %0,%1,%2,%3
28027
+ xsmadda<Fvsx> %x0,%x1,%x2
28028
+ xsmaddm<Fvsx> %x0,%x1,%x3"
28029
+ [(set_attr "type" "fp")
28030
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
28032
; Altivec only has fma and nfms.
28033
(define_expand "fms<mode>4"
28034
[(set (match_operand:FMA_F 0 "register_operand" "")
28035
@@ -13910,6 +14918,20 @@
28036
"!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
28039
+(define_insn "*fms<mode>4_fpr"
28040
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
28042
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
28043
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
28044
+ (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
28045
+ "TARGET_<MODE>_FPR"
28047
+ fmsub<Ftrad> %0,%1,%2,%3
28048
+ xsmsuba<Fvsx> %x0,%x1,%x2
28049
+ xsmsubm<Fvsx> %x0,%x1,%x3"
28050
+ [(set_attr "type" "fp")
28051
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
28053
;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
28054
(define_expand "fnma<mode>4"
28055
[(set (match_operand:FMA_F 0 "register_operand" "")
28056
@@ -13943,6 +14965,21 @@
28057
"!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
28060
+(define_insn "*nfma<mode>4_fpr"
28061
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
28064
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
28065
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
28066
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
28067
+ "TARGET_<MODE>_FPR"
28069
+ fnmadd<Ftrad> %0,%1,%2,%3
28070
+ xsnmadda<Fvsx> %x0,%x1,%x2
28071
+ xsnmaddm<Fvsx> %x0,%x1,%x3"
28072
+ [(set_attr "type" "fp")
28073
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
28075
; Not an official optab name, but used from builtins.
28076
(define_expand "nfms<mode>4"
28077
[(set (match_operand:FMA_F 0 "register_operand" "")
28078
@@ -13954,6 +14991,23 @@
28082
+(define_insn "*nfmssf4_fpr"
28083
+ [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
28086
+ (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
28087
+ (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
28089
+ (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))))]
28090
+ "TARGET_<MODE>_FPR"
28092
+ fnmsub<Ftrad> %0,%1,%2,%3
28093
+ xsnmsuba<Fvsx> %x0,%x1,%x2
28094
+ xsnmsubm<Fvsx> %x0,%x1,%x3"
28095
+ [(set_attr "type" "fp")
28096
+ (set_attr "fp_type" "fp_maddsub_<Fs>")])
28099
(define_expand "rs6000_get_timebase"
28100
[(use (match_operand:DI 0 "gpc_reg_operand" ""))]
28102
@@ -14020,7 +15074,44 @@
28106
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
28107
+;; a GPR. The addis instruction must be adjacent to the load, and use the same
28108
+;; register that is being loaded. The fused ops must be physically adjacent.
28110
+;; We use define_peephole for the actual addis/load, and the register used to
28111
+;; hold the addis value must be the same as the register being loaded. We use
28112
+;; define_peephole2 to change the register used for addis to be the register
28113
+;; being loaded, since we can look at whether it is dead after the load insn.
28116
+ [(set (match_operand:P 0 "base_reg_operand" "")
28117
+ (match_operand:P 1 "fusion_gpr_addis" ""))
28118
+ (set (match_operand:INT1 2 "base_reg_operand" "")
28119
+ (match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
28120
+ "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
28122
+ return emit_fusion_gpr_load (operands);
28124
+ [(set_attr "type" "load")
28125
+ (set_attr "length" "8")])
28128
+ [(set (match_operand:P 0 "base_reg_operand" "")
28129
+ (match_operand:P 1 "fusion_gpr_addis" ""))
28130
+ (set (match_operand:INT1 2 "base_reg_operand" "")
28131
+ (match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
28132
+ "TARGET_P8_FUSION
28133
+ && (REGNO (operands[0]) != REGNO (operands[2])
28134
+ || GET_CODE (operands[3]) == SIGN_EXTEND)
28135
+ && fusion_gpr_load_p (operands, true)"
28138
+ expand_fusion_gpr_load (operands);
28144
(include "sync.md")
28145
(include "vector.md")
28147
@@ -14028,3 +15119,5 @@
28150
(include "paired.md")
28151
+(include "crypto.md")
28152
+(include "htm.md")
28153
--- a/src/gcc/config/rs6000/option-defaults.h
28154
+++ b/src/gcc/config/rs6000/option-defaults.h
28156
--with-float is ignored if -mhard-float or -msoft-float are
28158
#define OPTION_DEFAULT_SPECS \
28159
+ {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \
28160
{"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
28161
{"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
28162
{"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
28163
--- a/src/gcc/config/rs6000/rs6000-opts.h
28164
+++ b/src/gcc/config/rs6000/rs6000-opts.h
28174
/* FP processor type. */
28175
@@ -100,7 +101,8 @@
28176
/* Enumeration to give which calling sequence to use. */
28179
- ABI_AIX, /* IBM's AIX */
28180
+ ABI_AIX, /* IBM's AIX, or Linux ELFv1 */
28181
+ ABI_ELFv2, /* Linux ELFv2 ABI */
28182
ABI_V4, /* System V.4/eabi */
28183
ABI_DARWIN /* Apple's Darwin (OS X kernel) */
28185
@@ -131,11 +133,14 @@
28189
-/* Describe which vector unit to use for a given machine mode. */
28190
+/* Describe which vector unit to use for a given machine mode. The
28191
+ VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
28192
+ P8_VECTOR are contiguous. */
28193
enum rs6000_vector {
28194
VECTOR_NONE, /* Type is not a vector or not supported */
28195
VECTOR_ALTIVEC, /* Use altivec for vector processing */
28196
VECTOR_VSX, /* Use VSX for vector processing */
28197
+ VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */
28198
VECTOR_PAIRED, /* Use paired floating point for vectors */
28199
VECTOR_SPE, /* Use SPE for vector processing */
28200
VECTOR_OTHER /* Some other vector unit */
28201
--- a/src/gcc/config/rs6000/driver-rs6000.c
28202
+++ b/src/gcc/config/rs6000/driver-rs6000.c
28203
@@ -167,7 +167,7 @@
28208
+ static char buf[1024];
28212
--- a/src/gcc/config/rs6000/altivec.h
28213
+++ b/src/gcc/config/rs6000/altivec.h
28214
@@ -321,6 +321,42 @@
28215
#define vec_vsx_st __builtin_vec_vsx_st
28219
+/* Vector additions added in ISA 2.07. */
28220
+#define vec_eqv __builtin_vec_eqv
28221
+#define vec_nand __builtin_vec_nand
28222
+#define vec_orc __builtin_vec_orc
28223
+#define vec_vaddudm __builtin_vec_vaddudm
28224
+#define vec_vclz __builtin_vec_vclz
28225
+#define vec_vclzb __builtin_vec_vclzb
28226
+#define vec_vclzd __builtin_vec_vclzd
28227
+#define vec_vclzh __builtin_vec_vclzh
28228
+#define vec_vclzw __builtin_vec_vclzw
28229
+#define vec_vgbbd __builtin_vec_vgbbd
28230
+#define vec_vmaxsd __builtin_vec_vmaxsd
28231
+#define vec_vmaxud __builtin_vec_vmaxud
28232
+#define vec_vminsd __builtin_vec_vminsd
28233
+#define vec_vminud __builtin_vec_vminud
28234
+#define vec_vmrgew __builtin_vec_vmrgew
28235
+#define vec_vmrgow __builtin_vec_vmrgow
28236
+#define vec_vpksdss __builtin_vec_vpksdss
28237
+#define vec_vpksdus __builtin_vec_vpksdus
28238
+#define vec_vpkudum __builtin_vec_vpkudum
28239
+#define vec_vpkudus __builtin_vec_vpkudus
28240
+#define vec_vpopcnt __builtin_vec_vpopcnt
28241
+#define vec_vpopcntb __builtin_vec_vpopcntb
28242
+#define vec_vpopcntd __builtin_vec_vpopcntd
28243
+#define vec_vpopcnth __builtin_vec_vpopcnth
28244
+#define vec_vpopcntw __builtin_vec_vpopcntw
28245
+#define vec_vrld __builtin_vec_vrld
28246
+#define vec_vsld __builtin_vec_vsld
28247
+#define vec_vsrad __builtin_vec_vsrad
28248
+#define vec_vsrd __builtin_vec_vsrd
28249
+#define vec_vsubudm __builtin_vec_vsubudm
28250
+#define vec_vupkhsw __builtin_vec_vupkhsw
28251
+#define vec_vupklsw __builtin_vec_vupklsw
28255
For C++, we use templates in order to allow non-parenthesized arguments.
28256
For C, instead, we use macros since non-parenthesized arguments were
28257
--- a/src/gcc/config/rs6000/sysv4.h
28258
+++ b/src/gcc/config/rs6000/sysv4.h
28260
& (OPTION_MASK_RELOCATABLE \
28261
| OPTION_MASK_MINIMAL_TOC)) \
28263
- || DEFAULT_ABI == ABI_AIX)
28264
+ || DEFAULT_ABI != ABI_V4)
28266
#define TARGET_BITFIELD_TYPE (! TARGET_NO_BITFIELD_TYPE)
28267
#define TARGET_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
28268
@@ -147,7 +147,7 @@
28269
rs6000_sdata_name); \
28272
- else if (flag_pic && DEFAULT_ABI != ABI_AIX \
28273
+ else if (flag_pic && DEFAULT_ABI == ABI_V4 \
28274
&& (rs6000_sdata == SDATA_EABI \
28275
|| rs6000_sdata == SDATA_SYSV)) \
28277
@@ -173,14 +173,14 @@
28278
error ("-mrelocatable and -mno-minimal-toc are incompatible"); \
28281
- if (TARGET_RELOCATABLE && rs6000_current_abi == ABI_AIX) \
28282
+ if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4) \
28284
rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
28285
error ("-mrelocatable and -mcall-%s are incompatible", \
28286
rs6000_abi_name); \
28289
- if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi == ABI_AIX) \
28290
+ if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4) \
28293
error ("-fPIC and -mcall-%s are incompatible", \
28294
@@ -193,7 +193,7 @@
28297
/* Treat -fPIC the same as -mrelocatable. */ \
28298
- if (flag_pic > 1 && DEFAULT_ABI != ABI_AIX) \
28299
+ if (flag_pic > 1 && DEFAULT_ABI == ABI_V4) \
28301
rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
28302
TARGET_NO_FP_IN_TOC = 1; \
28303
@@ -317,7 +317,7 @@
28305
/* Put PC relative got entries in .got2. */
28306
#define MINIMAL_TOC_SECTION_ASM_OP \
28307
- (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI != ABI_AIX) \
28308
+ (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4) \
28309
? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
28311
#define SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
28312
@@ -538,12 +538,7 @@
28314
#define CC1_ENDIAN_BIG_SPEC ""
28316
-#define CC1_ENDIAN_LITTLE_SPEC "\
28317
-%{!mstrict-align: %{!mno-strict-align: \
28318
- %{!mcall-i960-old: \
28322
+#define CC1_ENDIAN_LITTLE_SPEC ""
28324
#define CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_big)"
28326
--- a/src/libgo/configure
28327
+++ b/src/libgo/configure
28328
@@ -6501,7 +6501,7 @@
28332
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
28333
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
28334
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
28335
# Find out which ABI we are using.
28336
echo 'int i;' > conftest.$ac_ext
28337
@@ -6519,7 +6519,10 @@
28339
LD="${LD-ld} -m elf_i386"
28341
- ppc64-*linux*|powerpc64-*linux*)
28342
+ powerpc64le-*linux*)
28343
+ LD="${LD-ld} -m elf32lppclinux"
28345
+ powerpc64-*linux*)
28346
LD="${LD-ld} -m elf32ppclinux"
28349
@@ -6538,7 +6541,10 @@
28351
LD="${LD-ld} -m elf_x86_64"
28353
- ppc*-*linux*|powerpc*-*linux*)
28354
+ powerpcle-*linux*)
28355
+ LD="${LD-ld} -m elf64lppc"
28358
LD="${LD-ld} -m elf64ppc"
28360
s390*-*linux*|s390*-*tpf*)
28361
@@ -11105,7 +11111,7 @@
28362
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28363
lt_status=$lt_dlunknown
28364
cat > conftest.$ac_ext <<_LT_EOF
28365
-#line 11108 "configure"
28366
+#line 11114 "configure"
28367
#include "confdefs.h"
28370
@@ -11211,7 +11217,7 @@
28371
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28372
lt_status=$lt_dlunknown
28373
cat > conftest.$ac_ext <<_LT_EOF
28374
-#line 11214 "configure"
28375
+#line 11220 "configure"
28376
#include "confdefs.h"
28379
--- a/src/libgo/testsuite/gotest
28380
+++ b/src/libgo/testsuite/gotest
28381
@@ -369,7 +369,7 @@
28385
- ppc64) text="D" ;;
28386
+ ppc64) text="[TD]" ;;
28389
symtogo='sed -e s/_test/XXXtest/ -e s/.*_\([^_]*\.\)/\1/ -e s/XXXtest/_test/'
28390
--- a/src/libgo/config/libtool.m4
28391
+++ b/src/libgo/config/libtool.m4
28392
@@ -1225,7 +1225,7 @@
28396
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
28397
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
28398
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
28399
# Find out which ABI we are using.
28400
echo 'int i;' > conftest.$ac_ext
28401
@@ -1239,7 +1239,10 @@
28403
LD="${LD-ld} -m elf_i386"
28405
- ppc64-*linux*|powerpc64-*linux*)
28406
+ powerpc64le-*linux*)
28407
+ LD="${LD-ld} -m elf32lppclinux"
28409
+ powerpc64-*linux*)
28410
LD="${LD-ld} -m elf32ppclinux"
28413
@@ -1258,7 +1261,10 @@
28415
LD="${LD-ld} -m elf_x86_64"
28417
- ppc*-*linux*|powerpc*-*linux*)
28418
+ powerpcle-*linux*)
28419
+ LD="${LD-ld} -m elf64lppc"
28422
LD="${LD-ld} -m elf64ppc"
28424
s390*-*linux*|s390*-*tpf*)
28425
--- a/src/config.sub
28426
+++ b/src/config.sub
28429
# Configuration validation subroutine script.
28430
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
28431
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
28432
-# 2011, 2012, 2013 Free Software Foundation, Inc.
28433
+# Copyright 1992-2013 Free Software Foundation, Inc.
28435
-timestamp='2013-01-11'
28436
+timestamp='2013-10-01'
28438
# This file is free software; you can redistribute it and/or modify it
28439
# under the terms of the GNU General Public License as published by
28442
GNU config.sub ($timestamp)
28444
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
28445
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
28446
-2012, 2013 Free Software Foundation, Inc.
28447
+Copyright 1992-2013 Free Software Foundation, Inc.
28449
This is free software; see the source for copying conditions. There is NO
28450
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
28451
@@ -256,12 +252,12 @@
28452
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
28453
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
28457
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
28461
- | c4x | clipper \
28462
+ | c4x | c8051 | clipper \
28463
| d10v | d30v | dlx | dsp16xx \
28465
| fido | fr30 | frv \
28466
@@ -269,6 +265,7 @@
28468
| i370 | i860 | i960 | ia64 \
28473
| m32c | m32r | m32rle | m68000 | m68k | m88k \
28474
@@ -297,10 +294,10 @@
28477
| nds32 | nds32le | nds32be \
28479
+ | nios | nios2 | nios2eb | nios2el \
28484
| pdp10 | pdp11 | pj | pjl \
28485
| powerpc | powerpc64 | powerpc64le | powerpcle \
28487
@@ -328,7 +325,7 @@
28489
basic_machine=tic6x-unknown
28491
- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
28492
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
28493
basic_machine=$basic_machine-unknown
28496
@@ -370,13 +367,13 @@
28497
| aarch64-* | aarch64_be-* \
28498
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
28499
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
28500
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
28501
+ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
28502
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
28503
| avr-* | avr32-* \
28504
| be32-* | be64-* \
28505
| bfin-* | bs2000-* \
28506
| c[123]* | c30-* | [cjt]90-* | c4x-* \
28507
- | clipper-* | craynv-* | cydra-* \
28508
+ | c8051-* | clipper-* | craynv-* | cydra-* \
28509
| d10v-* | d30v-* | dlx-* \
28511
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
28512
@@ -385,6 +382,7 @@
28514
| i*86-* | i860-* | i960-* | ia64-* \
28515
| ip2k-* | iq2000-* \
28517
| le32-* | le64-* \
28519
| m32c-* | m32r-* | m32rle-* \
28520
@@ -414,7 +412,7 @@
28523
| nds32-* | nds32le-* | nds32be-* \
28524
- | nios-* | nios2-* \
28525
+ | nios-* | nios2-* | nios2eb-* | nios2el-* \
28526
| none-* | np1-* | ns16k-* | ns32k-* \
28529
@@ -798,7 +796,7 @@
28533
- basic_machine=i386-pc
28534
+ basic_machine=i686-pc
28538
@@ -834,7 +832,7 @@
28539
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
28542
- basic_machine=i386-pc
28543
+ basic_machine=i686-pc
28547
@@ -1550,6 +1548,9 @@
28557
@@ -1593,6 +1594,9 @@
28567
--- a/src/ChangeLog.ibm
28568
+++ b/src/ChangeLog.ibm
28570
+2013-12-10 Alan Modra <amodra@gmail.com>
28572
+ Apply gcc-4_8-branch r205803
28573
+ 2013-12-05 Alan Modra <amodra@gmail.com>
28574
+ * gcc/configure.ac (BUILD_CXXFLAGS) Don't use ALL_CXXFLAGS for
28576
+ <recursive call for build != host>: Clear GMPINC. Don't bother
28578
+ * gcc/configure: Regenerate.
28580
+2013-11-18 Alan Modra <amodra@gmail.com>
28582
+ Backport mainline r205844.
28583
+ * libffi/src/powerpc/ffitarget.h: Import from upstream.
28584
+ * libffi/src/powerpc/ffi_powerpc.h: Likewise.
28585
+ * libffi/src/powerpc/ffi.c: Likewise.
28586
+ * libffi/src/powerpc/ffi_sysv.c: Likewise.
28587
+ * libffi/src/powerpc/ffi_linux64.c: Likewise.
28588
+ * libffi/src/powerpc/sysv.S: Likewise.
28589
+ * libffi/src/powerpc/ppc_closure.S: Likewise.
28590
+ * libffi/src/powerpc/linux64.S: Likewise.
28591
+ * libffi/src/powerpc/linux64_closure.S: Likewise.
28592
+ * libffi/src/types.c: Likewise.
28593
+ * libffi/Makefile.am (EXTRA_DIST): Add new src/powerpc files.
28594
+ (nodist_libffi_la_SOURCES <POWERPC, POWERPC_FREEBSD>): Likewise.
28595
+ * libffi/configure.ac (HAVE_LONG_DOUBLE_VARIANT): Define for powerpc.
28596
+ * libffi/include/ffi.h.in (ffi_prep_types): Declare.
28597
+ * libffi/src/prep_cif.c (ffi_prep_cif_core): Call ffi_prep_types.
28598
+ * libffi/configure: Regenerate.
28599
+ * libffi/fficonfig.h.in: Regenerate.
28600
+ * libffi/Makefile.in: Regenerate.
28601
+ * libffi/man/Makefile.in: Regenerate.
28602
+ * libffi/include/Makefile.in: Regenerate.
28603
+ * libffi/testsuite/Makefile.in: Regenerate.
28605
+2013-11-22 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
28607
+ * libgo/config/libtool.m4: Update to mainline version.
28608
+ * libgo/configure: Regenerate.
28610
+2013-11-19 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
28612
+ Backport from mainline r205000.
28614
+ gotest: Recognize PPC ELF v2 function pointers in text section.
28616
+2013-11-18 Alan Modra <amodra@gmail.com>
28618
+ * libffi/src/powerpc/ppc_closure.S: Don't bl .Luint128.
28620
+ * libffi/src/powerpc/ffitarget.h: Import from upstream.
28621
+ * libffi/src/powerpc/ffi.c: Likewise.
28622
+ * libffi/src/powerpc/linux64.S: Likewise.
28623
+ * libffi/src/powerpc/linux64_closure.S: Likewise.
28624
+ * libffi/doc/libffi.texi: Likewise.
28625
+ * libffi/testsuite/libffi.call/cls_double_va.c: Likewise.
28626
+ * libffi/testsuite/libffi.call/cls_longdouble_va.c: Likewise.
28628
+2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
28630
+ * libgo/config/libtool.m4: Update to mainline version.
28631
+ * libgo/configure: Regenerate.
28633
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
28635
+ * libtool.m4: Update to mainline version.
28636
+ * libjava/libltdl/acinclude.m4: Likewise.
28638
+ * gcc/configure: Regenerate.
28639
+ * boehm-gc/configure: Regenerate.
28640
+ * libatomic/configure: Regenerate.
28641
+ * libbacktrace/configure: Regenerate.
28642
+ * libffi/configure: Regenerate.
28643
+ * libgfortran/configure: Regenerate.
28644
+ * libgomp/configure: Regenerate.
28645
+ * libitm/configure: Regenerate.
28646
+ * libjava/configure: Regenerate.
28647
+ * libjava/libltdl/configure: Regenerate.
28648
+ * libjava/classpath/configure: Regenerate.
28649
+ * libmudflap/configure: Regenerate.
28650
+ * libobjc/configure: Regenerate.
28651
+ * libquadmath/configure: Regenerate.
28652
+ * libsanitizer/configure: Regenerate.
28653
+ * libssp/configure: Regenerate.
28654
+ * libstdc++-v3/configure: Regenerate.
28655
+ * lto-plugin/configure: Regenerate.
28656
+ * zlib/configure: Regenerate.
28658
+2013-11-15 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
28660
+ Backport from mainline r203071:
28662
+ 2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
28664
+ Import from savannah.gnu.org:
28665
+ * config.guess: Update to 2013-06-10 version.
28666
+ * config.sub: Update to 2013-10-01 version.
28668
+2013-11-12 Bill Schmidt <wschmidt@linux.ibm.com>
28670
+ Backport from mainline
28671
+ 2013-09-20 Alan Modra <amodra@gmail.com>
28673
+ * libtool.m4 (_LT_ENABLE_LOCK <ld -m flags>): Remove non-canonical
28674
+ ppc host match. Support little-endian powerpc linux hosts.
28676
--- a/src/libobjc/configure
28677
+++ b/src/libobjc/configure
28678
@@ -6056,7 +6056,7 @@
28682
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
28683
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
28684
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
28685
# Find out which ABI we are using.
28686
echo 'int i;' > conftest.$ac_ext
28687
@@ -6081,7 +6081,10 @@
28691
- ppc64-*linux*|powerpc64-*linux*)
28692
+ powerpc64le-*linux*)
28693
+ LD="${LD-ld} -m elf32lppclinux"
28695
+ powerpc64-*linux*)
28696
LD="${LD-ld} -m elf32ppclinux"
28699
@@ -6100,7 +6103,10 @@
28701
LD="${LD-ld} -m elf_x86_64"
28703
- ppc*-*linux*|powerpc*-*linux*)
28704
+ powerpcle-*linux*)
28705
+ LD="${LD-ld} -m elf64lppc"
28708
LD="${LD-ld} -m elf64ppc"
28710
s390*-*linux*|s390*-*tpf*)
28711
@@ -10595,7 +10601,7 @@
28712
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28713
lt_status=$lt_dlunknown
28714
cat > conftest.$ac_ext <<_LT_EOF
28715
-#line 10598 "configure"
28716
+#line 10604 "configure"
28717
#include "confdefs.h"
28720
@@ -10701,7 +10707,7 @@
28721
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28722
lt_status=$lt_dlunknown
28723
cat > conftest.$ac_ext <<_LT_EOF
28724
-#line 10704 "configure"
28725
+#line 10710 "configure"
28726
#include "confdefs.h"
28729
@@ -11472,7 +11478,7 @@
28730
enableval=$enable_sjlj_exceptions; :
28732
cat > conftest.$ac_ext << EOF
28733
-#line 11475 "configure"
28734
+#line 11481 "configure"
28737
@implementation Frob
28738
--- a/src/libgfortran/configure
28739
+++ b/src/libgfortran/configure
28740
@@ -8062,7 +8062,7 @@
28744
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
28745
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
28746
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
28747
# Find out which ABI we are using.
28748
echo 'int i;' > conftest.$ac_ext
28749
@@ -8087,7 +8087,10 @@
28753
- ppc64-*linux*|powerpc64-*linux*)
28754
+ powerpc64le-*linux*)
28755
+ LD="${LD-ld} -m elf32lppclinux"
28757
+ powerpc64-*linux*)
28758
LD="${LD-ld} -m elf32ppclinux"
28761
@@ -8106,7 +8109,10 @@
28763
LD="${LD-ld} -m elf_x86_64"
28765
- ppc*-*linux*|powerpc*-*linux*)
28766
+ powerpcle-*linux*)
28767
+ LD="${LD-ld} -m elf64lppc"
28770
LD="${LD-ld} -m elf64ppc"
28772
s390*-*linux*|s390*-*tpf*)
28773
@@ -12333,7 +12339,7 @@
28774
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28775
lt_status=$lt_dlunknown
28776
cat > conftest.$ac_ext <<_LT_EOF
28777
-#line 12336 "configure"
28778
+#line 12342 "configure"
28779
#include "confdefs.h"
28782
@@ -12439,7 +12445,7 @@
28783
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28784
lt_status=$lt_dlunknown
28785
cat > conftest.$ac_ext <<_LT_EOF
28786
-#line 12442 "configure"
28787
+#line 12448 "configure"
28788
#include "confdefs.h"
28791
--- a/src/libffi/configure
28792
+++ b/src/libffi/configure
28793
@@ -613,6 +613,7 @@
28794
FFI_EXEC_TRAMPOLINE_TABLE
28795
FFI_EXEC_TRAMPOLINE_TABLE_FALSE
28796
FFI_EXEC_TRAMPOLINE_TABLE_TRUE
28797
+HAVE_LONG_DOUBLE_VARIANT
28801
@@ -6392,7 +6393,7 @@
28805
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
28806
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
28807
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
28808
# Find out which ABI we are using.
28809
echo 'int i;' > conftest.$ac_ext
28810
@@ -6417,7 +6418,10 @@
28814
- ppc64-*linux*|powerpc64-*linux*)
28815
+ powerpc64le-*linux*)
28816
+ LD="${LD-ld} -m elf32lppclinux"
28818
+ powerpc64-*linux*)
28819
LD="${LD-ld} -m elf32ppclinux"
28822
@@ -6436,7 +6440,10 @@
28824
LD="${LD-ld} -m elf_x86_64"
28826
- ppc*-*linux*|powerpc*-*linux*)
28827
+ powerpcle-*linux*)
28828
+ LD="${LD-ld} -m elf64lppc"
28831
LD="${LD-ld} -m elf64ppc"
28833
s390*-*linux*|s390*-*tpf*)
28834
@@ -10900,7 +10907,7 @@
28835
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28836
lt_status=$lt_dlunknown
28837
cat > conftest.$ac_ext <<_LT_EOF
28838
-#line 10903 "configure"
28839
+#line 10910 "configure"
28840
#include "confdefs.h"
28843
@@ -11006,7 +11013,7 @@
28844
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
28845
lt_status=$lt_dlunknown
28846
cat > conftest.$ac_ext <<_LT_EOF
28847
-#line 11009 "configure"
28848
+#line 11016 "configure"
28849
#include "confdefs.h"
28852
@@ -11443,6 +11450,7 @@
28855
TARGETDIR="unknown"
28856
+HAVE_LONG_DOUBLE_VARIANT=0
28859
TARGET=AARCH64; TARGETDIR=aarch64
28860
@@ -11540,6 +11548,7 @@
28862
powerpc*-*-linux* | powerpc-*-sysv*)
28863
TARGET=POWERPC; TARGETDIR=powerpc
28864
+ HAVE_LONG_DOUBLE_VARIANT=1
28866
powerpc-*-amigaos*)
28867
TARGET=POWERPC; TARGETDIR=powerpc
28868
@@ -11555,6 +11564,7 @@
28870
powerpc-*-freebsd* | powerpc-*-openbsd*)
28871
TARGET=POWERPC_FREEBSD; TARGETDIR=powerpc
28872
+ HAVE_LONG_DOUBLE_VARIANT=1
28874
powerpc64-*-freebsd*)
28875
TARGET=POWERPC; TARGETDIR=powerpc
28876
@@ -12230,17 +12240,25 @@
28877
# Also AC_SUBST this variable for ffi.h.
28878
if test -z "$HAVE_LONG_DOUBLE"; then
28880
- if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
28881
- if test $ac_cv_sizeof_long_double != 0; then
28882
+ if test $ac_cv_sizeof_long_double != 0; then
28883
+ if test $HAVE_LONG_DOUBLE_VARIANT != 0; then
28885
+$as_echo "#define HAVE_LONG_DOUBLE_VARIANT 1" >>confdefs.h
28889
+ if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
28890
+ HAVE_LONG_DOUBLE=1
28892
$as_echo "#define HAVE_LONG_DOUBLE 1" >>confdefs.h
28901
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
28902
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
28903
if test "${ac_cv_c_bigendian+set}" = set; then :
28904
--- a/src/libffi/Makefile.in
28905
+++ b/src/libffi/Makefile.in
28906
@@ -48,10 +48,10 @@
28907
@IA64_TRUE@am__append_11 = src/ia64/ffi.c src/ia64/unix.S
28908
@M32R_TRUE@am__append_12 = src/m32r/sysv.S src/m32r/ffi.c
28909
@M68K_TRUE@am__append_13 = src/m68k/ffi.c src/m68k/sysv.S
28910
-@POWERPC_TRUE@am__append_14 = src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
28911
+@POWERPC_TRUE@am__append_14 = src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
28912
@POWERPC_AIX_TRUE@am__append_15 = src/powerpc/ffi_darwin.c src/powerpc/aix.S src/powerpc/aix_closure.S
28913
@POWERPC_DARWIN_TRUE@am__append_16 = src/powerpc/ffi_darwin.c src/powerpc/darwin.S src/powerpc/darwin_closure.S
28914
-@POWERPC_FREEBSD_TRUE@am__append_17 = src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
28915
+@POWERPC_FREEBSD_TRUE@am__append_17 = src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
28916
@AARCH64_TRUE@am__append_18 = src/aarch64/sysv.S src/aarch64/ffi.c
28917
@ARM_TRUE@am__append_19 = src/arm/sysv.S src/arm/ffi.c
28918
@ARM_TRUE@@FFI_EXEC_TRAMPOLINE_TABLE_TRUE@am__append_20 = src/arm/trampoline.S
28919
@@ -133,7 +133,9 @@
28920
@IA64_TRUE@am__objects_11 = src/ia64/ffi.lo src/ia64/unix.lo
28921
@M32R_TRUE@am__objects_12 = src/m32r/sysv.lo src/m32r/ffi.lo
28922
@M68K_TRUE@am__objects_13 = src/m68k/ffi.lo src/m68k/sysv.lo
28923
-@POWERPC_TRUE@am__objects_14 = src/powerpc/ffi.lo src/powerpc/sysv.lo \
28924
+@POWERPC_TRUE@am__objects_14 = src/powerpc/ffi.lo \
28925
+@POWERPC_TRUE@ src/powerpc/ffi_sysv.lo \
28926
+@POWERPC_TRUE@ src/powerpc/ffi_linux64.lo src/powerpc/sysv.lo \
28927
@POWERPC_TRUE@ src/powerpc/ppc_closure.lo \
28928
@POWERPC_TRUE@ src/powerpc/linux64.lo \
28929
@POWERPC_TRUE@ src/powerpc/linux64_closure.lo
28930
@@ -144,6 +146,7 @@
28931
@POWERPC_DARWIN_TRUE@ src/powerpc/darwin.lo \
28932
@POWERPC_DARWIN_TRUE@ src/powerpc/darwin_closure.lo
28933
@POWERPC_FREEBSD_TRUE@am__objects_17 = src/powerpc/ffi.lo \
28934
+@POWERPC_FREEBSD_TRUE@ src/powerpc/ffi_sysv.lo \
28935
@POWERPC_FREEBSD_TRUE@ src/powerpc/sysv.lo \
28936
@POWERPC_FREEBSD_TRUE@ src/powerpc/ppc_closure.lo
28937
@AARCH64_TRUE@am__objects_18 = src/aarch64/sysv.lo src/aarch64/ffi.lo
28938
@@ -278,6 +281,7 @@
28941
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
28942
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
28943
INSTALL = @INSTALL@
28944
INSTALL_DATA = @INSTALL_DATA@
28945
INSTALL_PROGRAM = @INSTALL_PROGRAM@
28946
@@ -387,10 +391,12 @@
28947
src/ia64/unix.S src/mips/ffi.c src/mips/n32.S src/mips/o32.S \
28948
src/mips/ffitarget.h src/m32r/ffi.c src/m32r/sysv.S \
28949
src/m32r/ffitarget.h src/m68k/ffi.c src/m68k/sysv.S \
28950
- src/m68k/ffitarget.h src/powerpc/ffi.c src/powerpc/sysv.S \
28951
- src/powerpc/linux64.S src/powerpc/linux64_closure.S \
28952
- src/powerpc/ppc_closure.S src/powerpc/asm.h \
28953
- src/powerpc/aix.S src/powerpc/darwin.S \
28954
+ src/m68k/ffitarget.h \
28955
+ src/powerpc/ffi.c src/powerpc/ffi_powerpc.h \
28956
+ src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c \
28957
+ src/powerpc/sysv.S src/powerpc/linux64.S \
28958
+ src/powerpc/linux64_closure.S src/powerpc/ppc_closure.S \
28959
+ src/powerpc/asm.h src/powerpc/aix.S src/powerpc/darwin.S \
28960
src/powerpc/aix_closure.S src/powerpc/darwin_closure.S \
28961
src/powerpc/ffi_darwin.c src/powerpc/ffitarget.h \
28962
src/s390/ffi.c src/s390/sysv.S src/s390/ffitarget.h \
28963
@@ -711,6 +717,10 @@
28964
@: > src/powerpc/$(DEPDIR)/$(am__dirstamp)
28965
src/powerpc/ffi.lo: src/powerpc/$(am__dirstamp) \
28966
src/powerpc/$(DEPDIR)/$(am__dirstamp)
28967
+src/powerpc/ffi_sysv.lo: src/powerpc/$(am__dirstamp) \
28968
+ src/powerpc/$(DEPDIR)/$(am__dirstamp)
28969
+src/powerpc/ffi_linux64.lo: src/powerpc/$(am__dirstamp) \
28970
+ src/powerpc/$(DEPDIR)/$(am__dirstamp)
28971
src/powerpc/sysv.lo: src/powerpc/$(am__dirstamp) \
28972
src/powerpc/$(DEPDIR)/$(am__dirstamp)
28973
src/powerpc/ppc_closure.lo: src/powerpc/$(am__dirstamp) \
28974
@@ -912,6 +922,10 @@
28975
-rm -f src/powerpc/ffi.lo
28976
-rm -f src/powerpc/ffi_darwin.$(OBJEXT)
28977
-rm -f src/powerpc/ffi_darwin.lo
28978
+ -rm -f src/powerpc/ffi_linux64.$(OBJEXT)
28979
+ -rm -f src/powerpc/ffi_linux64.lo
28980
+ -rm -f src/powerpc/ffi_sysv.$(OBJEXT)
28981
+ -rm -f src/powerpc/ffi_sysv.lo
28982
-rm -f src/powerpc/linux64.$(OBJEXT)
28983
-rm -f src/powerpc/linux64.lo
28984
-rm -f src/powerpc/linux64_closure.$(OBJEXT)
28985
@@ -1009,6 +1023,8 @@
28986
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/darwin_closure.Plo@am__quote@
28987
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi.Plo@am__quote@
28988
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_darwin.Plo@am__quote@
28989
+@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_linux64.Plo@am__quote@
28990
+@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ffi_sysv.Plo@am__quote@
28991
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/linux64.Plo@am__quote@
28992
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/linux64_closure.Plo@am__quote@
28993
@AMDEP_TRUE@@am__include@ @am__quote@src/powerpc/$(DEPDIR)/ppc_closure.Plo@am__quote@
28994
--- a/src/libffi/include/ffi.h.in
28995
+++ b/src/libffi/include/ffi.h.in
28996
@@ -207,6 +207,11 @@
29000
+#if HAVE_LONG_DOUBLE_VARIANT
29001
+/* Used to adjust size/alignment of ffi types. */
29002
+void ffi_prep_types (ffi_abi abi);
29005
/* Used internally, but overridden by some architectures */
29006
ffi_status ffi_prep_cif_core(ffi_cif *cif,
29008
--- a/src/libffi/include/Makefile.in
29009
+++ b/src/libffi/include/Makefile.in
29010
@@ -113,6 +113,7 @@
29013
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
29014
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
29015
INSTALL = @INSTALL@
29016
INSTALL_DATA = @INSTALL_DATA@
29017
INSTALL_PROGRAM = @INSTALL_PROGRAM@
29018
--- a/src/libffi/fficonfig.h.in
29019
+++ b/src/libffi/fficonfig.h.in
29021
/* Define if you have the long double type and it is bigger than a double */
29022
#undef HAVE_LONG_DOUBLE
29024
+/* Define if you support more than one size of the long double type */
29025
+#undef HAVE_LONG_DOUBLE_VARIANT
29027
/* Define to 1 if you have the `memcpy' function. */
29030
--- a/src/libffi/src/powerpc/ppc_closure.S
29031
+++ b/src/libffi/src/powerpc/ppc_closure.S
29034
.file "ppc_closure.S"
29036
-#ifndef __powerpc64__
29039
ENTRY(ffi_closure_SYSV)
29041
@@ -238,7 +238,7 @@
29048
# The return types below are only used when the ABI type is FFI_SYSV.
29049
# case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct.
29050
@@ -378,8 +378,7 @@
29056
#if defined __ELF__ && defined __linux__
29057
.section .note.GNU-stack,"",@progbits
29060
--- a/src/libffi/src/powerpc/ffitarget.h
29061
+++ b/src/libffi/src/powerpc/ffitarget.h
29062
@@ -60,45 +60,76 @@
29063
typedef enum ffi_abi {
29071
- FFI_LINUX_SOFT_FLOAT,
29072
-# if defined(POWERPC64)
29073
- FFI_DEFAULT_ABI = FFI_LINUX64,
29074
-# elif defined(__NO_FPRS__)
29075
- FFI_DEFAULT_ABI = FFI_LINUX_SOFT_FLOAT,
29076
-# elif (__LDBL_MANT_DIG__ == 106)
29077
- FFI_DEFAULT_ABI = FFI_LINUX,
29079
- FFI_DEFAULT_ABI = FFI_GCC_SYSV,
29083
-#ifdef POWERPC_AIX
29084
+#if defined (POWERPC_AIX)
29087
FFI_DEFAULT_ABI = FFI_AIX,
29091
-#ifdef POWERPC_DARWIN
29092
+#elif defined (POWERPC_DARWIN)
29095
FFI_DEFAULT_ABI = FFI_DARWIN,
29099
-#ifdef POWERPC_FREEBSD
29104
- FFI_LINUX_SOFT_FLOAT,
29105
- FFI_DEFAULT_ABI = FFI_SYSV,
29107
+ /* The FFI_COMPAT values are used by old code. Since libffi may be
29108
+ a shared library we have to support old values for backwards
29109
+ compatibility. */
29111
+ FFI_COMPAT_GCC_SYSV,
29112
+ FFI_COMPAT_LINUX64,
29113
+ FFI_COMPAT_LINUX,
29114
+ FFI_COMPAT_LINUX_SOFT_FLOAT,
29116
+# if defined (POWERPC64)
29117
+ /* This bit, always set in new code, must not be set in any of the
29118
+ old FFI_COMPAT values that might be used for 64-bit linux. We
29119
+ only need worry about FFI_COMPAT_LINUX64, but to be safe avoid
29120
+ all old values. */
29122
+ /* This and following bits can reuse FFI_COMPAT values. */
29123
+ FFI_LINUX_STRUCT_ALIGN = 1,
29124
+ FFI_LINUX_LONG_DOUBLE_128 = 2,
29125
+ FFI_DEFAULT_ABI = (FFI_LINUX
29126
+# ifdef __STRUCT_PARM_ALIGN__
29127
+ | FFI_LINUX_STRUCT_ALIGN
29129
+# ifdef __LONG_DOUBLE_128__
29130
+ | FFI_LINUX_LONG_DOUBLE_128
29133
+ FFI_LAST_ABI = 12
29136
+ /* This bit, always set in new code, must not be set in any of the
29137
+ old FFI_COMPAT values that might be used for 32-bit linux/sysv/bsd. */
29139
+ /* This and following bits can reuse FFI_COMPAT values. */
29140
+ FFI_SYSV_SOFT_FLOAT = 1,
29141
+ FFI_SYSV_STRUCT_RET = 2,
29142
+ FFI_SYSV_IBM_LONG_DOUBLE = 4,
29143
+ FFI_SYSV_LONG_DOUBLE_128 = 16,
29145
+ FFI_DEFAULT_ABI = (FFI_SYSV
29146
+# ifdef __NO_FPRS__
29147
+ | FFI_SYSV_SOFT_FLOAT
29149
+# if (defined (__SVR4_STRUCT_RETURN) \
29150
+ || defined (POWERPC_FREEBSD) && !defined (__AIX_STRUCT_RETURN))
29151
+ | FFI_SYSV_STRUCT_RET
29153
+# if __LDBL_MANT_DIG__ == 106
29154
+ | FFI_SYSV_IBM_LONG_DOUBLE
29156
+# ifdef __LONG_DOUBLE_128__
29157
+ | FFI_SYSV_LONG_DOUBLE_128
29160
+ FFI_LAST_ABI = 32
29168
@@ -106,6 +137,10 @@
29170
#define FFI_CLOSURES 1
29171
#define FFI_NATIVE_RAW_API 0
29172
+#if defined (POWERPC) || defined (POWERPC_FREEBSD)
29173
+# define FFI_TARGET_SPECIFIC_VARIADIC 1
29174
+# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
29177
/* For additional types like the below, take care about the order in
29178
ppc_closures.S. They must follow after the FFI_TYPE_LAST. */
29179
@@ -113,19 +148,26 @@
29180
/* Needed for soft-float long-double-128 support. */
29181
#define FFI_TYPE_UINT128 (FFI_TYPE_LAST + 1)
29183
-/* Needed for FFI_SYSV small structure returns.
29184
- We use two flag bits, (FLAG_SYSV_SMST_R3, FLAG_SYSV_SMST_R4) which are
29185
- defined in ffi.c, to determine the exact return type and its size. */
29186
+/* Needed for FFI_SYSV small structure returns. */
29187
#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2)
29189
-#if defined(POWERPC64) || defined(POWERPC_AIX)
29190
+/* Used by ELFv2 for homogenous structure returns. */
29191
+#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1)
29192
+#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2)
29193
+#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3)
29195
+#if _CALL_ELF == 2
29196
+# define FFI_TRAMPOLINE_SIZE 32
29198
+# if defined(POWERPC64) || defined(POWERPC_AIX)
29199
# if defined(POWERPC_DARWIN64)
29200
# define FFI_TRAMPOLINE_SIZE 48
29202
# define FFI_TRAMPOLINE_SIZE 24
29204
-#else /* POWERPC || POWERPC_AIX */
29205
+# else /* POWERPC || POWERPC_AIX */
29206
# define FFI_TRAMPOLINE_SIZE 40
29211
--- a/src/libffi/src/powerpc/ffi.c
29212
+++ b/src/libffi/src/powerpc/ffi.c
29214
/* -----------------------------------------------------------------------
29215
- ffi.c - Copyright (C) 2011 Anthony Green
29216
+ ffi.c - Copyright (C) 2013 IBM
29217
+ Copyright (C) 2011 Anthony Green
29218
Copyright (C) 2011 Kyle Moffett
29219
Copyright (C) 2008 Red Hat, Inc
29220
Copyright (C) 2007, 2008 Free Software Foundation, Inc
29221
@@ -27,966 +28,104 @@
29222
OTHER DEALINGS IN THE SOFTWARE.
29223
----------------------------------------------------------------------- */
29226
-#include <ffi_common.h>
29228
+#include "ffi_common.h"
29229
+#include "ffi_powerpc.h"
29231
-#include <stdlib.h>
29232
-#include <stdio.h>
29235
-extern void ffi_closure_SYSV (void);
29236
-extern void FFI_HIDDEN ffi_closure_LINUX64 (void);
29239
- /* The assembly depends on these exact flags. */
29240
- FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
29241
- FLAG_RETURNS_NOTHING = 1 << (31-30), /* These go in cr7 */
29242
-#ifndef __NO_FPRS__
29243
- FLAG_RETURNS_FP = 1 << (31-29),
29245
- FLAG_RETURNS_64BITS = 1 << (31-28),
29247
- FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */
29249
- FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
29250
-#ifndef __NO_FPRS__
29251
- FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
29253
- FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
29254
- FLAG_RETVAL_REFERENCE = 1 << (31- 4)
29257
-/* About the SYSV ABI. */
29258
-#define ASM_NEEDS_REGISTERS 4
29259
-#define NUM_GPR_ARG_REGISTERS 8
29260
-#ifndef __NO_FPRS__
29261
-# define NUM_FPR_ARG_REGISTERS 8
29264
-/* ffi_prep_args_SYSV is called by the assembly routine once stack space
29265
- has been allocated for the function's arguments.
29267
- The stack layout we want looks like this:
29269
- | Return address from ffi_call_SYSV 4bytes | higher addresses
29270
- |--------------------------------------------|
29271
- | Previous backchain pointer 4 | stack pointer here
29272
- |--------------------------------------------|<+ <<< on entry to
29273
- | Saved r28-r31 4*4 | | ffi_call_SYSV
29274
- |--------------------------------------------| |
29275
- | GPR registers r3-r10 8*4 | | ffi_call_SYSV
29276
- |--------------------------------------------| |
29277
- | FPR registers f1-f8 (optional) 8*8 | |
29278
- |--------------------------------------------| | stack |
29279
- | Space for copied structures | | grows |
29280
- |--------------------------------------------| | down V
29281
- | Parameters that didn't fit in registers | |
29282
- |--------------------------------------------| | lower addresses
29283
- | Space for callee's LR 4 | |
29284
- |--------------------------------------------| | stack pointer here
29285
- | Current backchain pointer 4 |-/ during
29286
- |--------------------------------------------| <<< ffi_call_SYSV
29291
-ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
29292
+#if HAVE_LONG_DOUBLE_VARIANT
29293
+/* Adjust ffi_type_longdouble. */
29295
+ffi_prep_types (ffi_abi abi)
29297
- const unsigned bytes = ecif->cif->bytes;
29298
- const unsigned flags = ecif->cif->flags;
29308
- /* 'stacktop' points at the previous backchain pointer. */
29311
- /* 'gpr_base' points at the space for gpr3, and grows upwards as
29312
- we use GPR registers. */
29314
- int intarg_count;
29316
-#ifndef __NO_FPRS__
29317
- /* 'fpr_base' points at the space for fpr1, and grows upwards as
29318
- we use FPR registers. */
29323
- /* 'copy_space' grows down as we put structures in it. It should
29324
- stay 16-byte aligned. */
29327
- /* 'next_arg' grows up as we put parameters in it. */
29332
-#ifndef __NO_FPRS__
29333
- double double_tmp;
29338
- signed char **sc;
29339
- unsigned char **uc;
29340
- signed short **ss;
29341
- unsigned short **us;
29342
- unsigned int **ui;
29347
- size_t struct_copy_size;
29348
- unsigned gprvalue;
29350
- stacktop.c = (char *) stack + bytes;
29351
- gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
29352
- intarg_count = 0;
29353
-#ifndef __NO_FPRS__
29354
- fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
29356
- copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
29358
- copy_space.c = gpr_base.c;
29360
- next_arg.u = stack + 2;
29362
- /* Check that everything starts aligned properly. */
29363
- FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
29364
- FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
29365
- FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
29366
- FFI_ASSERT ((bytes & 0xF) == 0);
29367
- FFI_ASSERT (copy_space.c >= next_arg.c);
29369
- /* Deal with return values that are actually pass-by-reference. */
29370
- if (flags & FLAG_RETVAL_REFERENCE)
29372
- *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
29376
- /* Now for the arguments. */
29377
- p_argv.v = ecif->avalue;
29378
- for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
29380
- i--, ptr++, p_argv.v++)
29382
- unsigned short typenum = (*ptr)->type;
29384
- /* We may need to handle some values depending on ABI */
29385
- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) {
29386
- if (typenum == FFI_TYPE_FLOAT)
29387
- typenum = FFI_TYPE_UINT32;
29388
- if (typenum == FFI_TYPE_DOUBLE)
29389
- typenum = FFI_TYPE_UINT64;
29390
- if (typenum == FFI_TYPE_LONGDOUBLE)
29391
- typenum = FFI_TYPE_UINT128;
29392
- } else if (ecif->cif->abi != FFI_LINUX) {
29393
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29394
- if (typenum == FFI_TYPE_LONGDOUBLE)
29395
- typenum = FFI_TYPE_STRUCT;
29399
- /* Now test the translated value */
29400
- switch (typenum) {
29401
-#ifndef __NO_FPRS__
29402
- case FFI_TYPE_FLOAT:
29403
- /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */
29404
- double_tmp = **p_argv.f;
29405
- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
29407
- *next_arg.f = (float) double_tmp;
29412
- *fpr_base.d++ = double_tmp;
29414
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29417
- case FFI_TYPE_DOUBLE:
29418
- /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
29419
- double_tmp = **p_argv.d;
29421
- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
29423
- if (intarg_count >= NUM_GPR_ARG_REGISTERS
29424
- && intarg_count % 2 != 0)
29429
- *next_arg.d = double_tmp;
29433
- *fpr_base.d++ = double_tmp;
29435
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29438
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29439
- case FFI_TYPE_LONGDOUBLE:
29440
- double_tmp = (*p_argv.d)[0];
29442
- if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1)
29444
- if (intarg_count >= NUM_GPR_ARG_REGISTERS
29445
- && intarg_count % 2 != 0)
29450
- *next_arg.d = double_tmp;
29452
- double_tmp = (*p_argv.d)[1];
29453
- *next_arg.d = double_tmp;
29458
- *fpr_base.d++ = double_tmp;
29459
- double_tmp = (*p_argv.d)[1];
29460
- *fpr_base.d++ = double_tmp;
29463
- fparg_count += 2;
29464
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29467
-#endif /* have FPRs */
29470
- * The soft float ABI for long doubles works like this, a long double
29471
- * is passed in four consecutive GPRs if available. A maximum of 2
29472
- * long doubles can be passed in gprs. If we do not have 4 GPRs
29473
- * left, the long double is passed on the stack, 4-byte aligned.
29475
- case FFI_TYPE_UINT128: {
29476
- unsigned int int_tmp = (*p_argv.ui)[0];
29478
- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3) {
29479
- if (intarg_count < NUM_GPR_ARG_REGISTERS)
29480
- intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count;
29481
- *(next_arg.u++) = int_tmp;
29482
- for (ii = 1; ii < 4; ii++) {
29483
- int_tmp = (*p_argv.ui)[ii];
29484
- *(next_arg.u++) = int_tmp;
29487
- *(gpr_base.u++) = int_tmp;
29488
- for (ii = 1; ii < 4; ii++) {
29489
- int_tmp = (*p_argv.ui)[ii];
29490
- *(gpr_base.u++) = int_tmp;
29493
- intarg_count += 4;
29497
- case FFI_TYPE_UINT64:
29498
- case FFI_TYPE_SINT64:
29499
- if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
29501
- if (intarg_count >= NUM_GPR_ARG_REGISTERS)
29503
- if (intarg_count % 2 != 0)
29508
- *next_arg.ll = **p_argv.ll;
29513
- /* whoops: abi states only certain register pairs
29514
- * can be used for passing long long int
29515
- * specifically (r3,r4), (r5,r6), (r7,r8),
29516
- * (r9,r10) and if next arg is long long but
29517
- * not correct starting register of pair then skip
29518
- * until the proper starting register
29520
- if (intarg_count % 2 != 0)
29525
- *gpr_base.ll++ = **p_argv.ll;
29527
- intarg_count += 2;
29530
- case FFI_TYPE_STRUCT:
29531
- struct_copy_size = ((*ptr)->size + 15) & ~0xF;
29532
- copy_space.c -= struct_copy_size;
29533
- memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
29535
- gprvalue = (unsigned long) copy_space.c;
29537
- FFI_ASSERT (copy_space.c > next_arg.c);
29538
- FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
29541
- case FFI_TYPE_UINT8:
29542
- gprvalue = **p_argv.uc;
29544
- case FFI_TYPE_SINT8:
29545
- gprvalue = **p_argv.sc;
29547
- case FFI_TYPE_UINT16:
29548
- gprvalue = **p_argv.us;
29550
- case FFI_TYPE_SINT16:
29551
- gprvalue = **p_argv.ss;
29554
- case FFI_TYPE_INT:
29555
- case FFI_TYPE_UINT32:
29556
- case FFI_TYPE_SINT32:
29557
- case FFI_TYPE_POINTER:
29559
- gprvalue = **p_argv.ui;
29562
- if (intarg_count >= NUM_GPR_ARG_REGISTERS)
29563
- *next_arg.u++ = gprvalue;
29565
- *gpr_base.u++ = gprvalue;
29571
- /* Check that we didn't overrun the stack... */
29572
- FFI_ASSERT (copy_space.c >= next_arg.c);
29573
- FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
29574
-#ifndef __NO_FPRS__
29575
- FFI_ASSERT (fpr_base.u
29576
- <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
29578
- FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
29579
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29581
+ ffi_prep_types_linux64 (abi);
29583
+ ffi_prep_types_sysv (abi);
29588
-/* About the LINUX64 ABI. */
29590
- NUM_GPR_ARG_REGISTERS64 = 8,
29591
- NUM_FPR_ARG_REGISTERS64 = 13
29593
-enum { ASM_NEEDS_REGISTERS64 = 4 };
29595
-/* ffi_prep_args64 is called by the assembly routine once stack space
29596
- has been allocated for the function's arguments.
29598
- The stack layout we want looks like this:
29600
- | Ret addr from ffi_call_LINUX64 8bytes | higher addresses
29601
- |--------------------------------------------|
29602
- | CR save area 8bytes |
29603
- |--------------------------------------------|
29604
- | Previous backchain pointer 8 | stack pointer here
29605
- |--------------------------------------------|<+ <<< on entry to
29606
- | Saved r28-r31 4*8 | | ffi_call_LINUX64
29607
- |--------------------------------------------| |
29608
- | GPR registers r3-r10 8*8 | |
29609
- |--------------------------------------------| |
29610
- | FPR registers f1-f13 (optional) 13*8 | |
29611
- |--------------------------------------------| |
29612
- | Parameter save area | |
29613
- |--------------------------------------------| |
29614
- | TOC save area 8 | |
29615
- |--------------------------------------------| | stack |
29616
- | Linker doubleword 8 | | grows |
29617
- |--------------------------------------------| | down V
29618
- | Compiler doubleword 8 | |
29619
- |--------------------------------------------| | lower addresses
29620
- | Space for callee's LR 8 | |
29621
- |--------------------------------------------| |
29622
- | CR save area 8 | |
29623
- |--------------------------------------------| | stack pointer here
29624
- | Current backchain pointer 8 |-/ during
29625
- |--------------------------------------------| <<< ffi_call_LINUX64
29630
-ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
29632
- const unsigned long bytes = ecif->cif->bytes;
29633
- const unsigned long flags = ecif->cif->flags;
29637
- unsigned long *ul;
29642
- /* 'stacktop' points at the previous backchain pointer. */
29645
- /* 'next_arg' points at the space for gpr3, and grows upwards as
29646
- we use GPR registers, then continues at rest. */
29652
- /* 'fpr_base' points at the space for fpr3, and grows upwards as
29653
- we use FPR registers. */
29659
- double double_tmp;
29663
- signed char **sc;
29664
- unsigned char **uc;
29665
- signed short **ss;
29666
- unsigned short **us;
29668
- unsigned int **ui;
29669
- unsigned long **ul;
29673
- unsigned long gprvalue;
29675
- stacktop.c = (char *) stack + bytes;
29676
- gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
29677
- gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
29678
- rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
29679
- fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
29681
- next_arg.ul = gpr_base.ul;
29683
- /* Check that everything starts aligned properly. */
29684
- FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
29685
- FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
29686
- FFI_ASSERT ((bytes & 0xF) == 0);
29688
- /* Deal with return values that are actually pass-by-reference. */
29689
- if (flags & FLAG_RETVAL_REFERENCE)
29690
- *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
29692
- /* Now for the arguments. */
29693
- p_argv.v = ecif->avalue;
29694
- for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
29696
- i--, ptr++, p_argv.v++)
29698
- switch ((*ptr)->type)
29700
- case FFI_TYPE_FLOAT:
29701
- double_tmp = **p_argv.f;
29702
- *next_arg.f = (float) double_tmp;
29703
- if (++next_arg.ul == gpr_end.ul)
29704
- next_arg.ul = rest.ul;
29705
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
29706
- *fpr_base.d++ = double_tmp;
29708
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29711
- case FFI_TYPE_DOUBLE:
29712
- double_tmp = **p_argv.d;
29713
- *next_arg.d = double_tmp;
29714
- if (++next_arg.ul == gpr_end.ul)
29715
- next_arg.ul = rest.ul;
29716
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
29717
- *fpr_base.d++ = double_tmp;
29719
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29722
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29723
- case FFI_TYPE_LONGDOUBLE:
29724
- double_tmp = (*p_argv.d)[0];
29725
- *next_arg.d = double_tmp;
29726
- if (++next_arg.ul == gpr_end.ul)
29727
- next_arg.ul = rest.ul;
29728
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
29729
- *fpr_base.d++ = double_tmp;
29731
- double_tmp = (*p_argv.d)[1];
29732
- *next_arg.d = double_tmp;
29733
- if (++next_arg.ul == gpr_end.ul)
29734
- next_arg.ul = rest.ul;
29735
- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
29736
- *fpr_base.d++ = double_tmp;
29738
- FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
29739
- FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
29743
- case FFI_TYPE_STRUCT:
29744
- words = ((*ptr)->size + 7) / 8;
29745
- if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
29747
- size_t first = gpr_end.c - next_arg.c;
29748
- memcpy (next_arg.c, *p_argv.c, first);
29749
- memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
29750
- next_arg.c = rest.c + words * 8 - first;
29754
- char *where = next_arg.c;
29756
-#ifndef __LITTLE_ENDIAN__
29757
- /* Structures with size less than eight bytes are passed
29759
- if ((*ptr)->size < 8)
29760
- where += 8 - (*ptr)->size;
29762
- memcpy (where, *p_argv.c, (*ptr)->size);
29763
- next_arg.ul += words;
29764
- if (next_arg.ul == gpr_end.ul)
29765
- next_arg.ul = rest.ul;
29769
- case FFI_TYPE_UINT8:
29770
- gprvalue = **p_argv.uc;
29772
- case FFI_TYPE_SINT8:
29773
- gprvalue = **p_argv.sc;
29775
- case FFI_TYPE_UINT16:
29776
- gprvalue = **p_argv.us;
29778
- case FFI_TYPE_SINT16:
29779
- gprvalue = **p_argv.ss;
29781
- case FFI_TYPE_UINT32:
29782
- gprvalue = **p_argv.ui;
29784
- case FFI_TYPE_INT:
29785
- case FFI_TYPE_SINT32:
29786
- gprvalue = **p_argv.si;
29789
- case FFI_TYPE_UINT64:
29790
- case FFI_TYPE_SINT64:
29791
- case FFI_TYPE_POINTER:
29792
- gprvalue = **p_argv.ul;
29794
- *next_arg.ul++ = gprvalue;
29795
- if (next_arg.ul == gpr_end.ul)
29796
- next_arg.ul = rest.ul;
29801
- FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
29802
- || (next_arg.ul >= gpr_base.ul
29803
- && next_arg.ul <= gpr_base.ul + 4));
29808
/* Perform machine dependent cif processing */
29810
+ffi_status FFI_HIDDEN
29811
ffi_prep_cif_machdep (ffi_cif *cif)
29813
- /* All this is for the SYSV and LINUX64 ABI. */
29817
- int fparg_count = 0, intarg_count = 0;
29818
- unsigned flags = 0;
29819
- unsigned struct_copy_size = 0;
29820
- unsigned type = cif->rtype->type;
29821
- unsigned size = cif->rtype->size;
29823
- if (cif->abi != FFI_LINUX64)
29825
- /* All the machine-independent calculation of cif->bytes will be wrong.
29826
- Redo the calculation for SYSV. */
29828
- /* Space for the frame pointer, callee's LR, and the asm's temp regs. */
29829
- bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
29831
- /* Space for the GPR registers. */
29832
- bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
29836
- /* 64-bit ABI. */
29838
- /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
29840
- bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
29842
- /* Space for the mandatory parm save area and general registers. */
29843
- bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
29846
- /* Return value handling. The rules for SYSV are as follows:
29847
- - 32-bit (or less) integer values are returned in gpr3;
29848
- - Structures of size <= 4 bytes also returned in gpr3;
29849
- - 64-bit integer values and structures between 5 and 8 bytes are returned
29850
- in gpr3 and gpr4;
29851
- - Single/double FP values are returned in fpr1;
29852
- - Larger structures are allocated space and a pointer is passed as
29853
- the first argument.
29854
- - long doubles (if not equivalent to double) are returned in
29855
- fpr1,fpr2 for Linux and as for large structs for SysV.
29857
- - integer values in gpr3;
29858
- - Structures/Unions by reference;
29859
- - Single/double FP values in fpr1, long double in fpr1,fpr2.
29860
- - soft-float float/doubles are treated as UINT32/UINT64 respectivley.
29861
- - soft-float long doubles are returned in gpr3-gpr6. */
29862
- /* First translate for softfloat/nonlinux */
29863
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
29864
- if (type == FFI_TYPE_FLOAT)
29865
- type = FFI_TYPE_UINT32;
29866
- if (type == FFI_TYPE_DOUBLE)
29867
- type = FFI_TYPE_UINT64;
29868
- if (type == FFI_TYPE_LONGDOUBLE)
29869
- type = FFI_TYPE_UINT128;
29870
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
29871
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29872
- if (type == FFI_TYPE_LONGDOUBLE)
29873
- type = FFI_TYPE_STRUCT;
29875
+ return ffi_prep_cif_linux64 (cif);
29877
+ return ffi_prep_cif_sysv (cif);
29884
-#ifndef __NO_FPRS__
29885
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29886
- case FFI_TYPE_LONGDOUBLE:
29887
- flags |= FLAG_RETURNS_128BITS;
29888
- /* Fall through. */
29889
+ffi_status FFI_HIDDEN
29890
+ffi_prep_cif_machdep_var (ffi_cif *cif,
29891
+ unsigned int nfixedargs MAYBE_UNUSED,
29892
+ unsigned int ntotalargs MAYBE_UNUSED)
29895
+ return ffi_prep_cif_linux64_var (cif, nfixedargs, ntotalargs);
29897
+ return ffi_prep_cif_sysv (cif);
29899
- case FFI_TYPE_DOUBLE:
29900
- flags |= FLAG_RETURNS_64BITS;
29901
- /* Fall through. */
29902
- case FFI_TYPE_FLOAT:
29903
- flags |= FLAG_RETURNS_FP;
29907
- case FFI_TYPE_UINT128:
29908
- flags |= FLAG_RETURNS_128BITS;
29909
- /* Fall through. */
29910
- case FFI_TYPE_UINT64:
29911
- case FFI_TYPE_SINT64:
29912
- flags |= FLAG_RETURNS_64BITS;
29915
- case FFI_TYPE_STRUCT:
29917
- * The final SYSV ABI says that structures smaller or equal 8 bytes
29918
- * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
29921
- * NOTE: The assembly code can safely assume that it just needs to
29922
- * store both r3 and r4 into a 8-byte word-aligned buffer, as
29923
- * we allocate a temporary buffer in ffi_call() if this flag is
29926
- if (cif->abi == FFI_SYSV && size <= 8)
29927
- flags |= FLAG_RETURNS_SMST;
29929
- flags |= FLAG_RETVAL_REFERENCE;
29930
- /* Fall through. */
29931
- case FFI_TYPE_VOID:
29932
- flags |= FLAG_RETURNS_NOTHING;
29936
- /* Returns 32-bit integer, or similar. Nothing to do here. */
29940
- if (cif->abi != FFI_LINUX64)
29941
- /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
29942
- first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
29943
- goes on the stack. Structures and long doubles (if not equivalent
29944
- to double) are passed as a pointer to a copy of the structure.
29945
- Stuff on the stack needs to keep proper alignment. */
29946
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
29948
- unsigned short typenum = (*ptr)->type;
29950
- /* We may need to handle some values depending on ABI */
29951
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
29952
- if (typenum == FFI_TYPE_FLOAT)
29953
- typenum = FFI_TYPE_UINT32;
29954
- if (typenum == FFI_TYPE_DOUBLE)
29955
- typenum = FFI_TYPE_UINT64;
29956
- if (typenum == FFI_TYPE_LONGDOUBLE)
29957
- typenum = FFI_TYPE_UINT128;
29958
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
29959
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29960
- if (typenum == FFI_TYPE_LONGDOUBLE)
29961
- typenum = FFI_TYPE_STRUCT;
29965
- switch (typenum) {
29966
-#ifndef __NO_FPRS__
29967
- case FFI_TYPE_FLOAT:
29969
- /* floating singles are not 8-aligned on stack */
29972
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
29973
- case FFI_TYPE_LONGDOUBLE:
29977
- case FFI_TYPE_DOUBLE:
29979
- /* If this FP arg is going on the stack, it must be
29980
- 8-byte-aligned. */
29981
- if (fparg_count > NUM_FPR_ARG_REGISTERS
29982
- && intarg_count >= NUM_GPR_ARG_REGISTERS
29983
- && intarg_count % 2 != 0)
29987
- case FFI_TYPE_UINT128:
29989
- * A long double in FFI_LINUX_SOFT_FLOAT can use only a set
29990
- * of four consecutive gprs. If we do not have enough, we
29991
- * have to adjust the intarg_count value.
29993
- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
29994
- && intarg_count < NUM_GPR_ARG_REGISTERS)
29995
- intarg_count = NUM_GPR_ARG_REGISTERS;
29996
- intarg_count += 4;
29999
- case FFI_TYPE_UINT64:
30000
- case FFI_TYPE_SINT64:
30001
- /* 'long long' arguments are passed as two words, but
30002
- either both words must fit in registers or both go
30003
- on the stack. If they go on the stack, they must
30004
- be 8-byte-aligned.
30006
- Also, only certain register pairs can be used for
30007
- passing long long int -- specifically (r3,r4), (r5,r6),
30008
- (r7,r8), (r9,r10).
30010
- if (intarg_count == NUM_GPR_ARG_REGISTERS-1
30011
- || intarg_count % 2 != 0)
30013
- intarg_count += 2;
30016
- case FFI_TYPE_STRUCT:
30017
- /* We must allocate space for a copy of these to enforce
30018
- pass-by-value. Pad the space up to a multiple of 16
30019
- bytes (the maximum alignment required for anything under
30020
- the SYSV ABI). */
30021
- struct_copy_size += ((*ptr)->size + 15) & ~0xF;
30022
- /* Fall through (allocate space for the pointer). */
30024
- case FFI_TYPE_POINTER:
30025
- case FFI_TYPE_INT:
30026
- case FFI_TYPE_UINT32:
30027
- case FFI_TYPE_SINT32:
30028
- case FFI_TYPE_UINT16:
30029
- case FFI_TYPE_SINT16:
30030
- case FFI_TYPE_UINT8:
30031
- case FFI_TYPE_SINT8:
30032
- /* Everything else is passed as a 4-byte word in a GPR, either
30033
- the object itself or a pointer to it. */
30041
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
30043
- switch ((*ptr)->type)
30045
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
30046
- case FFI_TYPE_LONGDOUBLE:
30047
- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
30048
- intarg_count += 4;
30051
- fparg_count += 2;
30052
- intarg_count += 2;
30056
- case FFI_TYPE_FLOAT:
30057
- case FFI_TYPE_DOUBLE:
30062
- case FFI_TYPE_STRUCT:
30063
- intarg_count += ((*ptr)->size + 7) / 8;
30066
- case FFI_TYPE_POINTER:
30067
- case FFI_TYPE_UINT64:
30068
- case FFI_TYPE_SINT64:
30069
- case FFI_TYPE_INT:
30070
- case FFI_TYPE_UINT32:
30071
- case FFI_TYPE_SINT32:
30072
- case FFI_TYPE_UINT16:
30073
- case FFI_TYPE_SINT16:
30074
- case FFI_TYPE_UINT8:
30075
- case FFI_TYPE_SINT8:
30076
- /* Everything else is passed as a 8-byte word in a GPR, either
30077
- the object itself or a pointer to it. */
30085
-#ifndef __NO_FPRS__
30086
- if (fparg_count != 0)
30087
- flags |= FLAG_FP_ARGUMENTS;
30089
- if (intarg_count > 4)
30090
- flags |= FLAG_4_GPR_ARGUMENTS;
30091
- if (struct_copy_size != 0)
30092
- flags |= FLAG_ARG_NEEDS_COPY;
30094
- if (cif->abi != FFI_LINUX64)
30096
-#ifndef __NO_FPRS__
30097
- /* Space for the FPR registers, if needed. */
30098
- if (fparg_count != 0)
30099
- bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
30102
- /* Stack space. */
30103
- if (intarg_count > NUM_GPR_ARG_REGISTERS)
30104
- bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
30105
-#ifndef __NO_FPRS__
30106
- if (fparg_count > NUM_FPR_ARG_REGISTERS)
30107
- bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
30112
-#ifndef __NO_FPRS__
30113
- /* Space for the FPR registers, if needed. */
30114
- if (fparg_count != 0)
30115
- bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
30118
- /* Stack space. */
30119
- if (intarg_count > NUM_GPR_ARG_REGISTERS64)
30120
- bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
30123
- /* The stack space allocated needs to be a multiple of 16 bytes. */
30124
- bytes = (bytes + 15) & ~0xF;
30126
- /* Add in the space for the copied structures. */
30127
- bytes += struct_copy_size;
30129
- cif->flags = flags;
30130
- cif->bytes = bytes;
30135
-extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
30136
- void (*fn)(void));
30137
-extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long,
30138
- unsigned long, unsigned long *,
30139
- void (*fn)(void));
30142
ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
30145
- * The final SYSV ABI says that structures smaller or equal 8 bytes
30146
- * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
30149
- * Just to keep things simple for the assembly code, we will always
30150
- * bounce-buffer struct return values less than or equal to 8 bytes.
30151
- * This allows the ASM to handle SYSV small structures by directly
30152
- * writing r3 and r4 to memory without worrying about struct size.
30154
- unsigned int smst_buffer[2];
30155
+ /* The final SYSV ABI says that structures smaller or equal 8 bytes
30156
+ are returned in r3/r4. A draft ABI used by linux instead returns
30159
+ We bounce-buffer SYSV small struct return values so that sysv.S
30160
+ can write r3 and r4 to memory without worrying about struct size.
30162
+ For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
30163
+ for similar reasons. */
30164
+ unsigned long smst_buffer[8];
30166
- unsigned int rsize = 0;
30169
ecif.avalue = avalue;
30171
- /* Ensure that we have a valid struct return value */
30172
ecif.rvalue = rvalue;
30173
- if (cif->rtype->type == FFI_TYPE_STRUCT) {
30174
- rsize = cif->rtype->size;
30176
- ecif.rvalue = smst_buffer;
30177
- else if (!rvalue)
30178
- ecif.rvalue = alloca(rsize);
30180
+ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
30181
+ ecif.rvalue = smst_buffer;
30182
+ /* Ensure that we have a valid struct return value.
30183
+ FIXME: Isn't this just papering over a user problem? */
30184
+ else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT)
30185
+ ecif.rvalue = alloca (cif->rtype->size);
30187
- switch (cif->abi)
30190
-# ifndef __NO_FPRS__
30192
- case FFI_GCC_SYSV:
30195
- case FFI_LINUX_SOFT_FLOAT:
30196
- ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
30199
+ ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
30201
- case FFI_LINUX64:
30202
- ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
30204
+ ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
30211
/* Check for a bounce-buffered return value */
30212
if (rvalue && ecif.rvalue == smst_buffer)
30213
- memcpy(rvalue, smst_buffer, rsize);
30215
+ unsigned int rsize = cif->rtype->size;
30216
+#ifndef __LITTLE_ENDIAN__
30217
+ /* The SYSV ABI returns a structure of up to 4 bytes in size
30218
+ left-padded in r3. */
30219
+# ifndef POWERPC64
30221
+ memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
30224
+ /* The SYSV ABI returns a structure of up to 8 bytes in size
30225
+ left-padded in r3/r4, and the ELFv2 ABI similarly returns a
30226
+ structure of up to 8 bytes in size left-padded in r3. */
30228
+ memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
30231
+ memcpy (rvalue, smst_buffer, rsize);
30237
-#define MIN_CACHE_LINE_SIZE 8
30240
-flush_icache (char *wraddr, char *xaddr, int size)
30243
- for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
30244
- __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
30245
- : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
30246
- __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
30247
- : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
30253
ffi_prep_closure_loc (ffi_closure *closure,
30255
@@ -995,487 +134,8 @@
30259
- void **tramp = (void **) &closure->tramp[0];
30261
- if (cif->abi != FFI_LINUX64)
30262
- return FFI_BAD_ABI;
30263
- /* Copy function address and TOC from ffi_closure_LINUX64. */
30264
- memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
30265
- tramp[2] = codeloc;
30266
+ return ffi_prep_closure_loc_linux64 (closure, cif, fun, user_data, codeloc);
30268
- unsigned int *tramp;
30270
- if (! (cif->abi == FFI_GCC_SYSV
30271
- || cif->abi == FFI_SYSV
30272
- || cif->abi == FFI_LINUX
30273
- || cif->abi == FFI_LINUX_SOFT_FLOAT))
30274
- return FFI_BAD_ABI;
30276
- tramp = (unsigned int *) &closure->tramp[0];
30277
- tramp[0] = 0x7c0802a6; /* mflr r0 */
30278
- tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */
30279
- tramp[4] = 0x7d6802a6; /* mflr r11 */
30280
- tramp[5] = 0x7c0803a6; /* mtlr r0 */
30281
- tramp[6] = 0x800b0000; /* lwz r0,0(r11) */
30282
- tramp[7] = 0x816b0004; /* lwz r11,4(r11) */
30283
- tramp[8] = 0x7c0903a6; /* mtctr r0 */
30284
- tramp[9] = 0x4e800420; /* bctr */
30285
- *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */
30286
- *(void **) &tramp[3] = codeloc; /* context */
30288
- /* Flush the icache. */
30289
- flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
30290
+ return ffi_prep_closure_loc_sysv (closure, cif, fun, user_data, codeloc);
30293
- closure->cif = cif;
30294
- closure->fun = fun;
30295
- closure->user_data = user_data;
30306
-int ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *,
30307
- ffi_dblfl *, unsigned long *);
30309
-/* Basically the trampoline invokes ffi_closure_SYSV, and on
30310
- * entry, r11 holds the address of the closure.
30311
- * After storing the registers that could possibly contain
30312
- * parameters to be passed into the stack frame and setting
30313
- * up space for a return value, ffi_closure_SYSV invokes the
30314
- * following helper function to do most of the work
30318
-ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
30319
- unsigned long *pgr, ffi_dblfl *pfr,
30320
- unsigned long *pst)
30322
- /* rvalue is the pointer to space for return value in closure assembly */
30323
- /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
30324
- /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */
30325
- /* pst is the pointer to outgoing parameter stack in original caller */
30328
- ffi_type ** arg_types;
30330
-#ifndef __NO_FPRS__
30331
- long nf = 0; /* number of floating registers already used */
30333
- long ng = 0; /* number of general registers already used */
30335
- ffi_cif *cif = closure->cif;
30336
- unsigned size = cif->rtype->size;
30337
- unsigned short rtypenum = cif->rtype->type;
30339
- avalue = alloca (cif->nargs * sizeof (void *));
30341
- /* First translate for softfloat/nonlinux */
30342
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
30343
- if (rtypenum == FFI_TYPE_FLOAT)
30344
- rtypenum = FFI_TYPE_UINT32;
30345
- if (rtypenum == FFI_TYPE_DOUBLE)
30346
- rtypenum = FFI_TYPE_UINT64;
30347
- if (rtypenum == FFI_TYPE_LONGDOUBLE)
30348
- rtypenum = FFI_TYPE_UINT128;
30349
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
30350
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
30351
- if (rtypenum == FFI_TYPE_LONGDOUBLE)
30352
- rtypenum = FFI_TYPE_STRUCT;
30357
- /* Copy the caller's structure return value address so that the closure
30358
- returns the data directly to the caller.
30359
- For FFI_SYSV the result is passed in r3/r4 if the struct size is less
30360
- or equal 8 bytes. */
30361
- if (rtypenum == FFI_TYPE_STRUCT && ((cif->abi != FFI_SYSV) || (size > 8))) {
30362
- rvalue = (void *) *pgr;
30368
- avn = cif->nargs;
30369
- arg_types = cif->arg_types;
30371
- /* Grab the addresses of the arguments from the stack frame. */
30372
- while (i < avn) {
30373
- unsigned short typenum = arg_types[i]->type;
30375
- /* We may need to handle some values depending on ABI */
30376
- if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
30377
- if (typenum == FFI_TYPE_FLOAT)
30378
- typenum = FFI_TYPE_UINT32;
30379
- if (typenum == FFI_TYPE_DOUBLE)
30380
- typenum = FFI_TYPE_UINT64;
30381
- if (typenum == FFI_TYPE_LONGDOUBLE)
30382
- typenum = FFI_TYPE_UINT128;
30383
- } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
30384
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
30385
- if (typenum == FFI_TYPE_LONGDOUBLE)
30386
- typenum = FFI_TYPE_STRUCT;
30390
- switch (typenum) {
30391
-#ifndef __NO_FPRS__
30392
- case FFI_TYPE_FLOAT:
30393
- /* unfortunately float values are stored as doubles
30394
- * in the ffi_closure_SYSV code (since we don't check
30395
- * the type in that routine).
30398
- /* there are 8 64bit floating point registers */
30402
- double temp = pfr->d;
30403
- pfr->f = (float) temp;
30410
- /* FIXME? here we are really changing the values
30411
- * stored in the original calling routines outgoing
30412
- * parameter stack. This is probably a really
30413
- * naughty thing to do but...
30420
- case FFI_TYPE_DOUBLE:
30421
- /* On the outgoing stack all values are aligned to 8 */
30422
- /* there are 8 64bit floating point registers */
30432
- if (((long) pst) & 4)
30439
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
30440
- case FFI_TYPE_LONGDOUBLE:
30449
- if (((long) pst) & 4)
30457
-#endif /* have FPRS */
30459
- case FFI_TYPE_UINT128:
30461
- * Test if for the whole long double, 4 gprs are available.
30462
- * otherwise the stuff ends up on the stack.
30475
- case FFI_TYPE_SINT8:
30476
- case FFI_TYPE_UINT8:
30477
-#ifndef __LITTLE_ENDIAN__
30478
- /* there are 8 gpr registers used to pass values */
30481
- avalue[i] = (char *) pgr + 3;
30487
- avalue[i] = (char *) pst + 3;
30492
- case FFI_TYPE_SINT16:
30493
- case FFI_TYPE_UINT16:
30494
-#ifndef __LITTLE_ENDIAN__
30495
- /* there are 8 gpr registers used to pass values */
30498
- avalue[i] = (char *) pgr + 2;
30504
- avalue[i] = (char *) pst + 2;
30509
- case FFI_TYPE_SINT32:
30510
- case FFI_TYPE_UINT32:
30511
- case FFI_TYPE_POINTER:
30512
- /* there are 8 gpr registers used to pass values */
30526
- case FFI_TYPE_STRUCT:
30527
- /* Structs are passed by reference. The address will appear in a
30528
- gpr if it is one of the first 8 arguments. */
30531
- avalue[i] = (void *) *pgr;
30537
- avalue[i] = (void *) *pst;
30542
- case FFI_TYPE_SINT64:
30543
- case FFI_TYPE_UINT64:
30544
- /* passing long long ints are complex, they must
30545
- * be passed in suitable register pairs such as
30546
- * (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
30547
- * and if the entire pair aren't available then the outgoing
30548
- * parameter stack is used for both but an alignment of 8
30549
- * must will be kept. So we must either look in pgr
30550
- * or pst to find the correct address for this type
30557
- /* skip r4, r6, r8 as starting points */
30567
- if (((long) pst) & 4)
30583
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
30585
- /* Tell ffi_closure_SYSV how to perform return type promotions.
30586
- Because the FFI_SYSV ABI returns the structures <= 8 bytes in r3/r4
30587
- we have to tell ffi_closure_SYSV how to treat them. We combine the base
30588
- type FFI_SYSV_TYPE_SMALL_STRUCT - 1 with the size of the struct.
30589
- So a one byte struct gets the return type 16. Return type 1 to 15 are
30590
- already used and we never have a struct with size zero. That is the reason
30591
- for the subtraction of 1. See the comment in ffitarget.h about ordering.
30593
- if (cif->abi == FFI_SYSV && rtypenum == FFI_TYPE_STRUCT && size <= 8)
30594
- return (FFI_SYSV_TYPE_SMALL_STRUCT - 1) + size;
30598
-int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
30599
- unsigned long *, ffi_dblfl *);
30602
-ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
30603
- unsigned long *pst, ffi_dblfl *pfr)
30605
- /* rvalue is the pointer to space for return value in closure assembly */
30606
- /* pst is the pointer to parameter save area
30607
- (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
30608
- /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
30611
- ffi_type **arg_types;
30614
- ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
30616
- cif = closure->cif;
30617
- avalue = alloca (cif->nargs * sizeof (void *));
30619
- /* Copy the caller's structure return value address so that the closure
30620
- returns the data directly to the caller. */
30621
- if (cif->rtype->type == FFI_TYPE_STRUCT)
30623
- rvalue = (void *) *pst;
30628
- avn = cif->nargs;
30629
- arg_types = cif->arg_types;
30631
- /* Grab the addresses of the arguments from the stack frame. */
30634
- switch (arg_types[i]->type)
30636
- case FFI_TYPE_SINT8:
30637
- case FFI_TYPE_UINT8:
30638
-#ifndef __LITTLE_ENDIAN__
30639
- avalue[i] = (char *) pst + 7;
30643
- case FFI_TYPE_SINT16:
30644
- case FFI_TYPE_UINT16:
30645
-#ifndef __LITTLE_ENDIAN__
30646
- avalue[i] = (char *) pst + 6;
30650
- case FFI_TYPE_SINT32:
30651
- case FFI_TYPE_UINT32:
30652
-#ifndef __LITTLE_ENDIAN__
30653
- avalue[i] = (char *) pst + 4;
30657
- case FFI_TYPE_SINT64:
30658
- case FFI_TYPE_UINT64:
30659
- case FFI_TYPE_POINTER:
30664
- case FFI_TYPE_STRUCT:
30665
-#ifndef __LITTLE_ENDIAN__
30666
- /* Structures with size less than eight bytes are passed
30668
- if (arg_types[i]->size < 8)
30669
- avalue[i] = (char *) pst + 8 - arg_types[i]->size;
30673
- pst += (arg_types[i]->size + 7) / 8;
30676
- case FFI_TYPE_FLOAT:
30677
- /* unfortunately float values are stored as doubles
30678
- * in the ffi_closure_LINUX64 code (since we don't check
30679
- * the type in that routine).
30682
- /* there are 13 64bit floating point registers */
30684
- if (pfr < end_pfr)
30686
- double temp = pfr->d;
30687
- pfr->f = (float) temp;
30696
- case FFI_TYPE_DOUBLE:
30697
- /* On the outgoing stack all values are aligned to 8 */
30698
- /* there are 13 64bit floating point registers */
30700
- if (pfr < end_pfr)
30710
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
30711
- case FFI_TYPE_LONGDOUBLE:
30712
- if (pfr + 1 < end_pfr)
30719
- if (pfr < end_pfr)
30721
- /* Passed partly in f13 and partly on the stack.
30722
- Move it all to the stack. */
30723
- *pst = *(unsigned long *) pfr;
30740
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
30742
- /* Tell ffi_closure_LINUX64 how to perform return type promotions. */
30743
- return cif->rtype->type;
30745
--- a/src/libffi/src/powerpc/sysv.S
30746
+++ b/src/libffi/src/powerpc/sysv.S
30749
#include <powerpc/asm.h>
30751
-#ifndef __powerpc64__
30753
.globl ffi_prep_args_SYSV
30754
ENTRY(ffi_call_SYSV)
30756
@@ -213,8 +213,8 @@
30762
#if defined __ELF__ && defined __linux__
30763
.section .note.GNU-stack,"",@progbits
30766
--- a/src/libffi/src/powerpc/linux64_closure.S
30767
+++ b/src/libffi/src/powerpc/linux64_closure.S
30768
@@ -30,18 +30,25 @@
30770
.file "linux64_closure.S"
30772
-#ifdef __powerpc64__
30774
FFI_HIDDEN (ffi_closure_LINUX64)
30775
.globl ffi_closure_LINUX64
30776
+# if _CALL_ELF == 2
30778
+ffi_closure_LINUX64:
30779
+ addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha
30780
+ addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l
30781
+ .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
30783
.section ".opd","aw"
30785
ffi_closure_LINUX64:
30786
-#ifdef _CALL_LINUX
30787
+# ifdef _CALL_LINUX
30788
.quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0
30789
.type ffi_closure_LINUX64,@function
30791
.L.ffi_closure_LINUX64:
30794
FFI_HIDDEN (.ffi_closure_LINUX64)
30795
.globl .ffi_closure_LINUX64
30796
.quad .ffi_closure_LINUX64,.TOC.@tocbase,0
30797
@@ -49,61 +56,101 @@
30798
.type .ffi_closure_LINUX64,@function
30800
.ffi_closure_LINUX64:
30805
+# if _CALL_ELF == 2
30806
+# 32 byte special reg save area + 64 byte parm save area
30807
+# + 64 byte retval area + 13*8 fpr save area + round to 16
30808
+# define STACKFRAME 272
30809
+# define PARMSAVE 32
30810
+# define RETVAL PARMSAVE+64
30812
+# 48 bytes special reg save area + 64 bytes parm save area
30813
+# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
30814
+# define STACKFRAME 240
30815
+# define PARMSAVE 48
30816
+# define RETVAL PARMSAVE+64
30820
- # save general regs into parm save area
30825
+# if _CALL_ELF == 2
30826
+ ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif
30828
+ lwz %r12, 28(%r12) # cif->flags
30830
+ addi %r12, %r1, PARMSAVE
30832
+ # Our caller has not allocated a parameter save area.
30833
+ # We need to allocate one here and use it to pass gprs to
30834
+ # ffi_closure_helper_LINUX64.
30835
+ addi %r12, %r1, -STACKFRAME+PARMSAVE
30838
+ # Save general regs into parm save area
30841
+ std %r5, 16(%r12)
30842
+ std %r6, 24(%r12)
30843
+ std %r7, 32(%r12)
30844
+ std %r8, 40(%r12)
30845
+ std %r9, 48(%r12)
30846
+ std %r10, 56(%r12)
30851
- std %r10, 104(%r1)
30852
+ # load up the pointer to the parm save area
30856
+ # Save general regs into parm save area
30857
+ # This is the parameter save area set up by our caller.
30858
+ std %r3, PARMSAVE+0(%r1)
30859
+ std %r4, PARMSAVE+8(%r1)
30860
+ std %r5, PARMSAVE+16(%r1)
30861
+ std %r6, PARMSAVE+24(%r1)
30862
+ std %r7, PARMSAVE+32(%r1)
30863
+ std %r8, PARMSAVE+40(%r1)
30864
+ std %r9, PARMSAVE+48(%r1)
30865
+ std %r10, PARMSAVE+56(%r1)
30869
- # mandatory 48 bytes special reg save area + 64 bytes parm save area
30870
- # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
30871
- stdu %r1, -240(%r1)
30873
+ # load up the pointer to the parm save area
30874
+ addi %r5, %r1, PARMSAVE
30877
# next save fpr 1 to fpr 13
30878
- stfd %f1, 128+(0*8)(%r1)
30879
- stfd %f2, 128+(1*8)(%r1)
30880
- stfd %f3, 128+(2*8)(%r1)
30881
- stfd %f4, 128+(3*8)(%r1)
30882
- stfd %f5, 128+(4*8)(%r1)
30883
- stfd %f6, 128+(5*8)(%r1)
30884
- stfd %f7, 128+(6*8)(%r1)
30885
- stfd %f8, 128+(7*8)(%r1)
30886
- stfd %f9, 128+(8*8)(%r1)
30887
- stfd %f10, 128+(9*8)(%r1)
30888
- stfd %f11, 128+(10*8)(%r1)
30889
- stfd %f12, 128+(11*8)(%r1)
30890
- stfd %f13, 128+(12*8)(%r1)
30891
+ stfd %f1, -104+(0*8)(%r1)
30892
+ stfd %f2, -104+(1*8)(%r1)
30893
+ stfd %f3, -104+(2*8)(%r1)
30894
+ stfd %f4, -104+(3*8)(%r1)
30895
+ stfd %f5, -104+(4*8)(%r1)
30896
+ stfd %f6, -104+(5*8)(%r1)
30897
+ stfd %f7, -104+(6*8)(%r1)
30898
+ stfd %f8, -104+(7*8)(%r1)
30899
+ stfd %f9, -104+(8*8)(%r1)
30900
+ stfd %f10, -104+(9*8)(%r1)
30901
+ stfd %f11, -104+(10*8)(%r1)
30902
+ stfd %f12, -104+(11*8)(%r1)
30903
+ stfd %f13, -104+(12*8)(%r1)
30905
- # set up registers for the routine that actually does the work
30906
- # get the context pointer from the trampoline
30908
+ # load up the pointer to the saved fpr registers */
30909
+ addi %r6, %r1, -104
30911
- # now load up the pointer to the result storage
30912
- addi %r4, %r1, 112
30913
+ # load up the pointer to the result storage
30914
+ addi %r4, %r1, -STACKFRAME+RETVAL
30916
- # now load up the pointer to the parameter save area
30917
- # in the previous frame
30918
- addi %r5, %r1, 240 + 48
30919
+ stdu %r1, -STACKFRAME(%r1)
30922
- # now load up the pointer to the saved fpr registers */
30923
- addi %r6, %r1, 128
30924
+ # get the context pointer from the trampoline
30928
-#ifdef _CALL_LINUX
30929
+# if defined _CALL_LINUX || _CALL_ELF == 2
30930
bl ffi_closure_helper_LINUX64
30933
bl .ffi_closure_helper_LINUX64
30938
# now r3 contains the return type
30939
@@ -112,10 +159,12 @@
30941
# look up the proper starting point in table
30942
# by using return type as offset
30943
+ ld %r0, STACKFRAME+16(%r1)
30944
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
30946
mflr %r4 # move address of .Lret to r4
30947
sldi %r3, %r3, 4 # now multiply return type by 16
30948
addi %r4, %r4, .Lret_type0 - .Lret
30949
- ld %r0, 240+16(%r1)
30950
add %r3, %r3, %r4 # add contents of table to table address
30953
@@ -128,117 +177,175 @@
30955
# case FFI_TYPE_VOID
30957
- addi %r1, %r1, 240
30958
+ addi %r1, %r1, STACKFRAME
30961
# case FFI_TYPE_INT
30962
-#ifdef __LITTLE_ENDIAN__
30963
- lwa %r3, 112+0(%r1)
30965
- lwa %r3, 112+4(%r1)
30967
+# ifdef __LITTLE_ENDIAN__
30968
+ lwa %r3, RETVAL+0(%r1)
30970
+ lwa %r3, RETVAL+4(%r1)
30973
- addi %r1, %r1, 240
30974
+ addi %r1, %r1, STACKFRAME
30976
# case FFI_TYPE_FLOAT
30977
- lfs %f1, 112+0(%r1)
30978
+ lfs %f1, RETVAL+0(%r1)
30980
- addi %r1, %r1, 240
30981
+ addi %r1, %r1, STACKFRAME
30983
# case FFI_TYPE_DOUBLE
30984
- lfd %f1, 112+0(%r1)
30985
+ lfd %f1, RETVAL+0(%r1)
30987
- addi %r1, %r1, 240
30988
+ addi %r1, %r1, STACKFRAME
30990
# case FFI_TYPE_LONGDOUBLE
30991
- lfd %f1, 112+0(%r1)
30992
+ lfd %f1, RETVAL+0(%r1)
30994
- lfd %f2, 112+8(%r1)
30995
+ lfd %f2, RETVAL+8(%r1)
30997
# case FFI_TYPE_UINT8
30998
-#ifdef __LITTLE_ENDIAN__
30999
- lbz %r3, 112+0(%r1)
31001
- lbz %r3, 112+7(%r1)
31003
+# ifdef __LITTLE_ENDIAN__
31004
+ lbz %r3, RETVAL+0(%r1)
31006
+ lbz %r3, RETVAL+7(%r1)
31009
- addi %r1, %r1, 240
31010
+ addi %r1, %r1, STACKFRAME
31012
# case FFI_TYPE_SINT8
31013
-#ifdef __LITTLE_ENDIAN__
31014
- lbz %r3, 112+0(%r1)
31016
- lbz %r3, 112+7(%r1)
31018
+# ifdef __LITTLE_ENDIAN__
31019
+ lbz %r3, RETVAL+0(%r1)
31021
+ lbz %r3, RETVAL+7(%r1)
31026
# case FFI_TYPE_UINT16
31027
-#ifdef __LITTLE_ENDIAN__
31028
- lhz %r3, 112+0(%r1)
31030
- lhz %r3, 112+6(%r1)
31032
+# ifdef __LITTLE_ENDIAN__
31033
+ lhz %r3, RETVAL+0(%r1)
31035
+ lhz %r3, RETVAL+6(%r1)
31039
- addi %r1, %r1, 240
31040
+ addi %r1, %r1, STACKFRAME
31042
# case FFI_TYPE_SINT16
31043
-#ifdef __LITTLE_ENDIAN__
31044
- lha %r3, 112+0(%r1)
31046
- lha %r3, 112+6(%r1)
31048
+# ifdef __LITTLE_ENDIAN__
31049
+ lha %r3, RETVAL+0(%r1)
31051
+ lha %r3, RETVAL+6(%r1)
31054
- addi %r1, %r1, 240
31055
+ addi %r1, %r1, STACKFRAME
31057
# case FFI_TYPE_UINT32
31058
-#ifdef __LITTLE_ENDIAN__
31059
- lwz %r3, 112+0(%r1)
31061
- lwz %r3, 112+4(%r1)
31063
+# ifdef __LITTLE_ENDIAN__
31064
+ lwz %r3, RETVAL+0(%r1)
31066
+ lwz %r3, RETVAL+4(%r1)
31069
- addi %r1, %r1, 240
31070
+ addi %r1, %r1, STACKFRAME
31072
# case FFI_TYPE_SINT32
31073
-#ifdef __LITTLE_ENDIAN__
31074
- lwa %r3, 112+0(%r1)
31076
- lwa %r3, 112+4(%r1)
31078
+# ifdef __LITTLE_ENDIAN__
31079
+ lwa %r3, RETVAL+0(%r1)
31081
+ lwa %r3, RETVAL+4(%r1)
31084
- addi %r1, %r1, 240
31085
+ addi %r1, %r1, STACKFRAME
31087
# case FFI_TYPE_UINT64
31088
- ld %r3, 112+0(%r1)
31089
+ ld %r3, RETVAL+0(%r1)
31091
- addi %r1, %r1, 240
31092
+ addi %r1, %r1, STACKFRAME
31094
# case FFI_TYPE_SINT64
31095
- ld %r3, 112+0(%r1)
31096
+ ld %r3, RETVAL+0(%r1)
31098
- addi %r1, %r1, 240
31099
+ addi %r1, %r1, STACKFRAME
31101
# case FFI_TYPE_STRUCT
31103
- addi %r1, %r1, 240
31104
+ addi %r1, %r1, STACKFRAME
31107
# case FFI_TYPE_POINTER
31108
- ld %r3, 112+0(%r1)
31109
+ ld %r3, RETVAL+0(%r1)
31111
- addi %r1, %r1, 240
31112
+ addi %r1, %r1, STACKFRAME
31115
+# case FFI_V2_TYPE_FLOAT_HOMOG
31116
+ lfs %f1, RETVAL+0(%r1)
31117
+ lfs %f2, RETVAL+4(%r1)
31118
+ lfs %f3, RETVAL+8(%r1)
31120
+# case FFI_V2_TYPE_DOUBLE_HOMOG
31121
+ lfd %f1, RETVAL+0(%r1)
31122
+ lfd %f2, RETVAL+8(%r1)
31123
+ lfd %f3, RETVAL+16(%r1)
31124
+ lfd %f4, RETVAL+24(%r1)
31126
+ lfd %f5, RETVAL+32(%r1)
31127
+ lfd %f6, RETVAL+40(%r1)
31128
+ lfd %f7, RETVAL+48(%r1)
31129
+ lfd %f8, RETVAL+56(%r1)
31130
+ addi %r1, %r1, STACKFRAME
31133
+ lfs %f4, RETVAL+12(%r1)
31135
+ lfs %f5, RETVAL+16(%r1)
31136
+ lfs %f6, RETVAL+20(%r1)
31137
+ lfs %f7, RETVAL+24(%r1)
31138
+ lfs %f8, RETVAL+28(%r1)
31139
+ addi %r1, %r1, STACKFRAME
31142
+# ifdef __LITTLE_ENDIAN__
31143
+ ld %r3,RETVAL+0(%r1)
31145
+ ld %r4,RETVAL+8(%r1)
31146
+ addi %r1, %r1, STACKFRAME
31149
+ # A struct smaller than a dword is returned in the low bits of r3
31150
+ # ie. right justified. Larger structs are passed left justified
31151
+ # in r3 and r4. The return value area on the stack will have
31152
+ # the structs as they are usually stored in memory.
31153
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
31155
+ ld %r3,RETVAL+0(%r1)
31158
+ ld %r4,RETVAL+8(%r1)
31159
+ addi %r1, %r1, STACKFRAME
31162
+ addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
31165
+ addi %r1, %r1, STACKFRAME
31166
+ srd %r3, %r3, %r5
31172
.byte 0,12,0,1,128,0,0,0
31173
-#ifdef _CALL_LINUX
31174
+# if _CALL_ELF == 2
31175
+ .size ffi_closure_LINUX64,.-ffi_closure_LINUX64
31177
+# ifdef _CALL_LINUX
31178
.size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
31181
.size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64
31186
.section .eh_frame,EH_FRAME_FLAGS,@progbits
31188
@@ -267,14 +374,14 @@
31189
.byte 0x2 # DW_CFA_advance_loc1
31191
.byte 0xe # DW_CFA_def_cfa_offset
31193
+ .uleb128 STACKFRAME
31194
.byte 0x11 # DW_CFA_offset_extended_sf
31201
-#if defined __ELF__ && defined __linux__
31202
+# if defined __ELF__ && defined __linux__
31203
.section .note.GNU-stack,"",@progbits
31206
--- a/src/libffi/src/powerpc/ffi_powerpc.h
31207
+++ b/src/libffi/src/powerpc/ffi_powerpc.h
31209
+/* -----------------------------------------------------------------------
31210
+ ffi_powerpc.h - Copyright (C) 2013 IBM
31211
+ Copyright (C) 2011 Anthony Green
31212
+ Copyright (C) 2011 Kyle Moffett
31213
+ Copyright (C) 2008 Red Hat, Inc
31214
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
31215
+ Copyright (c) 1998 Geoffrey Keating
31217
+ PowerPC Foreign Function Interface
31219
+ Permission is hereby granted, free of charge, to any person obtaining
31220
+ a copy of this software and associated documentation files (the
31221
+ ``Software''), to deal in the Software without restriction, including
31222
+ without limitation the rights to use, copy, modify, merge, publish,
31223
+ distribute, sublicense, and/or sell copies of the Software, and to
31224
+ permit persons to whom the Software is furnished to do so, subject to
31225
+ the following conditions:
31227
+ The above copyright notice and this permission notice shall be included
31228
+ in all copies or substantial portions of the Software.
31230
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
31231
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31232
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
31233
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
31234
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
31235
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
31236
+ OTHER DEALINGS IN THE SOFTWARE.
31237
+ ----------------------------------------------------------------------- */
31240
+ /* The assembly depends on these exact flags. */
31241
+ /* These go in cr7 */
31242
+ FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
31243
+ FLAG_RETURNS_NOTHING = 1 << (31-30),
31244
+ FLAG_RETURNS_FP = 1 << (31-29),
31245
+ FLAG_RETURNS_64BITS = 1 << (31-28),
31247
+ /* This goes in cr6 */
31248
+ FLAG_RETURNS_128BITS = 1 << (31-27),
31250
+ FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
31252
+ /* These go in cr1 */
31253
+ FLAG_ARG_NEEDS_COPY = 1 << (31- 7), /* Used by sysv code */
31254
+ FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */
31255
+ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
31256
+ FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
31257
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4)
31266
+void FFI_HIDDEN ffi_closure_SYSV (void);
31267
+void FFI_HIDDEN ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
31270
+void FFI_HIDDEN ffi_prep_types_sysv (ffi_abi);
31271
+ffi_status FFI_HIDDEN ffi_prep_cif_sysv (ffi_cif *);
31272
+int FFI_HIDDEN ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *,
31273
+ ffi_dblfl *, unsigned long *);
31275
+void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long, unsigned long,
31276
+ unsigned long *, void (*)(void));
31277
+void FFI_HIDDEN ffi_closure_LINUX64 (void);
31279
+void FFI_HIDDEN ffi_prep_types_linux64 (ffi_abi);
31280
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64 (ffi_cif *);
31281
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64_var (ffi_cif *, unsigned int,
31283
+void FFI_HIDDEN ffi_prep_args64 (extended_cif *, unsigned long *const);
31284
+int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
31285
+ unsigned long *, ffi_dblfl *);
31286
--- a/src/libffi/src/powerpc/ffi_sysv.c
31287
+++ b/src/libffi/src/powerpc/ffi_sysv.c
31289
+/* -----------------------------------------------------------------------
31290
+ ffi_sysv.c - Copyright (C) 2013 IBM
31291
+ Copyright (C) 2011 Anthony Green
31292
+ Copyright (C) 2011 Kyle Moffett
31293
+ Copyright (C) 2008 Red Hat, Inc
31294
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
31295
+ Copyright (c) 1998 Geoffrey Keating
31297
+ PowerPC Foreign Function Interface
31299
+ Permission is hereby granted, free of charge, to any person obtaining
31300
+ a copy of this software and associated documentation files (the
31301
+ ``Software''), to deal in the Software without restriction, including
31302
+ without limitation the rights to use, copy, modify, merge, publish,
31303
+ distribute, sublicense, and/or sell copies of the Software, and to
31304
+ permit persons to whom the Software is furnished to do so, subject to
31305
+ the following conditions:
31307
+ The above copyright notice and this permission notice shall be included
31308
+ in all copies or substantial portions of the Software.
31310
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
31311
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31312
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
31313
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
31314
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
31315
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
31316
+ OTHER DEALINGS IN THE SOFTWARE.
31317
+ ----------------------------------------------------------------------- */
31322
+#include "ffi_common.h"
31323
+#include "ffi_powerpc.h"
31326
+/* About the SYSV ABI. */
31327
+#define ASM_NEEDS_REGISTERS 4
31328
+#define NUM_GPR_ARG_REGISTERS 8
31329
+#define NUM_FPR_ARG_REGISTERS 8
31332
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31333
+/* Adjust size of ffi_type_longdouble. */
31335
+ffi_prep_types_sysv (ffi_abi abi)
31337
+ if ((abi & (FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128)) == FFI_SYSV)
31339
+ ffi_type_longdouble.size = 8;
31340
+ ffi_type_longdouble.alignment = 8;
31344
+ ffi_type_longdouble.size = 16;
31345
+ ffi_type_longdouble.alignment = 16;
31350
+/* Transform long double, double and float to other types as per abi. */
31352
+translate_float (int abi, int type)
31354
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31355
+ if (type == FFI_TYPE_LONGDOUBLE
31356
+ && (abi & FFI_SYSV_LONG_DOUBLE_128) == 0)
31357
+ type = FFI_TYPE_DOUBLE;
31359
+ if ((abi & FFI_SYSV_SOFT_FLOAT) != 0)
31361
+ if (type == FFI_TYPE_FLOAT)
31362
+ type = FFI_TYPE_UINT32;
31363
+ else if (type == FFI_TYPE_DOUBLE)
31364
+ type = FFI_TYPE_UINT64;
31365
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31366
+ else if (type == FFI_TYPE_LONGDOUBLE)
31367
+ type = FFI_TYPE_UINT128;
31369
+ else if ((abi & FFI_SYSV_IBM_LONG_DOUBLE) == 0)
31371
+ if (type == FFI_TYPE_LONGDOUBLE)
31372
+ type = FFI_TYPE_STRUCT;
31378
+/* Perform machine dependent cif processing */
31380
+ffi_prep_cif_sysv_core (ffi_cif *cif)
31384
+ unsigned i, fparg_count = 0, intarg_count = 0;
31385
+ unsigned flags = cif->flags;
31386
+ unsigned struct_copy_size = 0;
31387
+ unsigned type = cif->rtype->type;
31388
+ unsigned size = cif->rtype->size;
31390
+ /* The machine-independent calculation of cif->bytes doesn't work
31391
+ for us. Redo the calculation. */
31393
+ /* Space for the frame pointer, callee's LR, and the asm's temp regs. */
31394
+ bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
31396
+ /* Space for the GPR registers. */
31397
+ bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
31399
+ /* Return value handling. The rules for SYSV are as follows:
31400
+ - 32-bit (or less) integer values are returned in gpr3;
31401
+ - Structures of size <= 4 bytes also returned in gpr3;
31402
+ - 64-bit integer values and structures between 5 and 8 bytes are returned
31403
+ in gpr3 and gpr4;
31404
+ - Larger structures are allocated space and a pointer is passed as
31405
+ the first argument.
31406
+ - Single/double FP values are returned in fpr1;
31407
+ - long doubles (if not equivalent to double) are returned in
31408
+ fpr1,fpr2 for Linux and as for large structs for SysV. */
31410
+ type = translate_float (cif->abi, type);
31414
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31415
+ case FFI_TYPE_LONGDOUBLE:
31416
+ flags |= FLAG_RETURNS_128BITS;
31417
+ /* Fall through. */
31419
+ case FFI_TYPE_DOUBLE:
31420
+ flags |= FLAG_RETURNS_64BITS;
31421
+ /* Fall through. */
31422
+ case FFI_TYPE_FLOAT:
31423
+ flags |= FLAG_RETURNS_FP;
31424
+#ifdef __NO_FPRS__
31425
+ return FFI_BAD_ABI;
31429
+ case FFI_TYPE_UINT128:
31430
+ flags |= FLAG_RETURNS_128BITS;
31431
+ /* Fall through. */
31432
+ case FFI_TYPE_UINT64:
31433
+ case FFI_TYPE_SINT64:
31434
+ flags |= FLAG_RETURNS_64BITS;
31437
+ case FFI_TYPE_STRUCT:
31438
+ /* The final SYSV ABI says that structures smaller or equal 8 bytes
31439
+ are returned in r3/r4. A draft ABI used by linux instead
31440
+ returns them in memory. */
31441
+ if ((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
31443
+ flags |= FLAG_RETURNS_SMST;
31447
+ flags |= FLAG_RETVAL_REFERENCE;
31448
+ /* Fall through. */
31449
+ case FFI_TYPE_VOID:
31450
+ flags |= FLAG_RETURNS_NOTHING;
31454
+ /* Returns 32-bit integer, or similar. Nothing to do here. */
31458
+ /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
31459
+ first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
31460
+ goes on the stack. Structures and long doubles (if not equivalent
31461
+ to double) are passed as a pointer to a copy of the structure.
31462
+ Stuff on the stack needs to keep proper alignment. */
31463
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
31465
+ unsigned short typenum = (*ptr)->type;
31467
+ typenum = translate_float (cif->abi, typenum);
31471
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31472
+ case FFI_TYPE_LONGDOUBLE:
31476
+ case FFI_TYPE_DOUBLE:
31478
+ /* If this FP arg is going on the stack, it must be
31479
+ 8-byte-aligned. */
31480
+ if (fparg_count > NUM_FPR_ARG_REGISTERS
31481
+ && intarg_count >= NUM_GPR_ARG_REGISTERS
31482
+ && intarg_count % 2 != 0)
31484
+#ifdef __NO_FPRS__
31485
+ return FFI_BAD_ABI;
31489
+ case FFI_TYPE_FLOAT:
31491
+#ifdef __NO_FPRS__
31492
+ return FFI_BAD_ABI;
31496
+ case FFI_TYPE_UINT128:
31497
+ /* A long double in FFI_LINUX_SOFT_FLOAT can use only a set
31498
+ of four consecutive gprs. If we do not have enough, we
31499
+ have to adjust the intarg_count value. */
31500
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
31501
+ && intarg_count < NUM_GPR_ARG_REGISTERS)
31502
+ intarg_count = NUM_GPR_ARG_REGISTERS;
31503
+ intarg_count += 4;
31506
+ case FFI_TYPE_UINT64:
31507
+ case FFI_TYPE_SINT64:
31508
+ /* 'long long' arguments are passed as two words, but
31509
+ either both words must fit in registers or both go
31510
+ on the stack. If they go on the stack, they must
31511
+ be 8-byte-aligned.
31513
+ Also, only certain register pairs can be used for
31514
+ passing long long int -- specifically (r3,r4), (r5,r6),
31515
+ (r7,r8), (r9,r10). */
31516
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1
31517
+ || intarg_count % 2 != 0)
31519
+ intarg_count += 2;
31522
+ case FFI_TYPE_STRUCT:
31523
+ /* We must allocate space for a copy of these to enforce
31524
+ pass-by-value. Pad the space up to a multiple of 16
31525
+ bytes (the maximum alignment required for anything under
31526
+ the SYSV ABI). */
31527
+ struct_copy_size += ((*ptr)->size + 15) & ~0xF;
31528
+ /* Fall through (allocate space for the pointer). */
31530
+ case FFI_TYPE_POINTER:
31531
+ case FFI_TYPE_INT:
31532
+ case FFI_TYPE_UINT32:
31533
+ case FFI_TYPE_SINT32:
31534
+ case FFI_TYPE_UINT16:
31535
+ case FFI_TYPE_SINT16:
31536
+ case FFI_TYPE_UINT8:
31537
+ case FFI_TYPE_SINT8:
31538
+ /* Everything else is passed as a 4-byte word in a GPR, either
31539
+ the object itself or a pointer to it. */
31548
+ if (fparg_count != 0)
31549
+ flags |= FLAG_FP_ARGUMENTS;
31550
+ if (intarg_count > 4)
31551
+ flags |= FLAG_4_GPR_ARGUMENTS;
31552
+ if (struct_copy_size != 0)
31553
+ flags |= FLAG_ARG_NEEDS_COPY;
31555
+ /* Space for the FPR registers, if needed. */
31556
+ if (fparg_count != 0)
31557
+ bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
31559
+ /* Stack space. */
31560
+ if (intarg_count > NUM_GPR_ARG_REGISTERS)
31561
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
31562
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
31563
+ bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
31565
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
31566
+ bytes = (bytes + 15) & ~0xF;
31568
+ /* Add in the space for the copied structures. */
31569
+ bytes += struct_copy_size;
31571
+ cif->flags = flags;
31572
+ cif->bytes = bytes;
31577
+ffi_status FFI_HIDDEN
31578
+ffi_prep_cif_sysv (ffi_cif *cif)
31580
+ if ((cif->abi & FFI_SYSV) == 0)
31582
+ /* This call is from old code. Translate to new ABI values. */
31583
+ cif->flags |= FLAG_COMPAT;
31584
+ switch (cif->abi)
31587
+ return FFI_BAD_ABI;
31589
+ case FFI_COMPAT_SYSV:
31590
+ cif->abi = FFI_SYSV | FFI_SYSV_STRUCT_RET | FFI_SYSV_LONG_DOUBLE_128;
31593
+ case FFI_COMPAT_GCC_SYSV:
31594
+ cif->abi = FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128;
31597
+ case FFI_COMPAT_LINUX:
31598
+ cif->abi = (FFI_SYSV | FFI_SYSV_IBM_LONG_DOUBLE
31599
+ | FFI_SYSV_LONG_DOUBLE_128);
31602
+ case FFI_COMPAT_LINUX_SOFT_FLOAT:
31603
+ cif->abi = (FFI_SYSV | FFI_SYSV_SOFT_FLOAT | FFI_SYSV_IBM_LONG_DOUBLE
31604
+ | FFI_SYSV_LONG_DOUBLE_128);
31608
+ return ffi_prep_cif_sysv_core (cif);
31611
+/* ffi_prep_args_SYSV is called by the assembly routine once stack space
31612
+ has been allocated for the function's arguments.
31614
+ The stack layout we want looks like this:
31616
+ | Return address from ffi_call_SYSV 4bytes | higher addresses
31617
+ |--------------------------------------------|
31618
+ | Previous backchain pointer 4 | stack pointer here
31619
+ |--------------------------------------------|<+ <<< on entry to
31620
+ | Saved r28-r31 4*4 | | ffi_call_SYSV
31621
+ |--------------------------------------------| |
31622
+ | GPR registers r3-r10 8*4 | | ffi_call_SYSV
31623
+ |--------------------------------------------| |
31624
+ | FPR registers f1-f8 (optional) 8*8 | |
31625
+ |--------------------------------------------| | stack |
31626
+ | Space for copied structures | | grows |
31627
+ |--------------------------------------------| | down V
31628
+ | Parameters that didn't fit in registers | |
31629
+ |--------------------------------------------| | lower addresses
31630
+ | Space for callee's LR 4 | |
31631
+ |--------------------------------------------| | stack pointer here
31632
+ | Current backchain pointer 4 |-/ during
31633
+ |--------------------------------------------| <<< ffi_call_SYSV
31638
+ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
31640
+ const unsigned bytes = ecif->cif->bytes;
31641
+ const unsigned flags = ecif->cif->flags;
31652
+ /* 'stacktop' points at the previous backchain pointer. */
31655
+ /* 'gpr_base' points at the space for gpr3, and grows upwards as
31656
+ we use GPR registers. */
31658
+ int intarg_count;
31660
+#ifndef __NO_FPRS__
31661
+ /* 'fpr_base' points at the space for fpr1, and grows upwards as
31662
+ we use FPR registers. */
31667
+ /* 'copy_space' grows down as we put structures in it. It should
31668
+ stay 16-byte aligned. */
31671
+ /* 'next_arg' grows up as we put parameters in it. */
31676
+#ifndef __NO_FPRS__
31677
+ double double_tmp;
31683
+ signed char **sc;
31684
+ unsigned char **uc;
31685
+ signed short **ss;
31686
+ unsigned short **us;
31687
+ unsigned int **ui;
31692
+ size_t struct_copy_size;
31693
+ unsigned gprvalue;
31695
+ stacktop.c = (char *) stack + bytes;
31696
+ gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
31697
+ intarg_count = 0;
31698
+#ifndef __NO_FPRS__
31699
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
31701
+ copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
31703
+ copy_space.c = gpr_base.c;
31705
+ next_arg.u = stack + 2;
31707
+ /* Check that everything starts aligned properly. */
31708
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
31709
+ FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
31710
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
31711
+ FFI_ASSERT ((bytes & 0xF) == 0);
31712
+ FFI_ASSERT (copy_space.c >= next_arg.c);
31714
+ /* Deal with return values that are actually pass-by-reference. */
31715
+ if (flags & FLAG_RETVAL_REFERENCE)
31717
+ *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
31721
+ /* Now for the arguments. */
31722
+ p_argv.v = ecif->avalue;
31723
+ for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
31725
+ i--, ptr++, p_argv.v++)
31727
+ unsigned int typenum = (*ptr)->type;
31729
+ typenum = translate_float (ecif->cif->abi, typenum);
31731
+ /* Now test the translated value */
31734
+#ifndef __NO_FPRS__
31735
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
31736
+ case FFI_TYPE_LONGDOUBLE:
31737
+ double_tmp = (*p_argv.d)[0];
31739
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1)
31741
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
31742
+ && intarg_count % 2 != 0)
31747
+ *next_arg.d = double_tmp;
31749
+ double_tmp = (*p_argv.d)[1];
31750
+ *next_arg.d = double_tmp;
31755
+ *fpr_base.d++ = double_tmp;
31756
+ double_tmp = (*p_argv.d)[1];
31757
+ *fpr_base.d++ = double_tmp;
31760
+ fparg_count += 2;
31761
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
31764
+ case FFI_TYPE_DOUBLE:
31765
+ double_tmp = **p_argv.d;
31767
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
31769
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
31770
+ && intarg_count % 2 != 0)
31775
+ *next_arg.d = double_tmp;
31779
+ *fpr_base.d++ = double_tmp;
31781
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
31784
+ case FFI_TYPE_FLOAT:
31785
+ double_tmp = **p_argv.f;
31786
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
31788
+ *next_arg.f = (float) double_tmp;
31793
+ *fpr_base.d++ = double_tmp;
31795
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
31797
+#endif /* have FPRs */
31799
+ case FFI_TYPE_UINT128:
31800
+ /* The soft float ABI for long doubles works like this, a long double
31801
+ is passed in four consecutive GPRs if available. A maximum of 2
31802
+ long doubles can be passed in gprs. If we do not have 4 GPRs
31803
+ left, the long double is passed on the stack, 4-byte aligned. */
31805
+ unsigned int int_tmp;
31807
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3)
31809
+ if (intarg_count < NUM_GPR_ARG_REGISTERS)
31810
+ intarg_count = NUM_GPR_ARG_REGISTERS;
31811
+ for (ii = 0; ii < 4; ii++)
31813
+ int_tmp = (*p_argv.ui)[ii];
31814
+ *next_arg.u++ = int_tmp;
31819
+ for (ii = 0; ii < 4; ii++)
31821
+ int_tmp = (*p_argv.ui)[ii];
31822
+ *gpr_base.u++ = int_tmp;
31825
+ intarg_count += 4;
31829
+ case FFI_TYPE_UINT64:
31830
+ case FFI_TYPE_SINT64:
31831
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
31833
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
31835
+ if (intarg_count % 2 != 0)
31840
+ *next_arg.ll = **p_argv.ll;
31845
+ /* The abi states only certain register pairs can be
31846
+ used for passing long long int specifically (r3,r4),
31847
+ (r5,r6), (r7,r8), (r9,r10). If next arg is long long
31848
+ but not correct starting register of pair then skip
31849
+ until the proper starting register. */
31850
+ if (intarg_count % 2 != 0)
31855
+ *gpr_base.ll++ = **p_argv.ll;
31857
+ intarg_count += 2;
31860
+ case FFI_TYPE_STRUCT:
31861
+ struct_copy_size = ((*ptr)->size + 15) & ~0xF;
31862
+ copy_space.c -= struct_copy_size;
31863
+ memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
31865
+ gprvalue = (unsigned long) copy_space.c;
31867
+ FFI_ASSERT (copy_space.c > next_arg.c);
31868
+ FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
31871
+ case FFI_TYPE_UINT8:
31872
+ gprvalue = **p_argv.uc;
31874
+ case FFI_TYPE_SINT8:
31875
+ gprvalue = **p_argv.sc;
31877
+ case FFI_TYPE_UINT16:
31878
+ gprvalue = **p_argv.us;
31880
+ case FFI_TYPE_SINT16:
31881
+ gprvalue = **p_argv.ss;
31884
+ case FFI_TYPE_INT:
31885
+ case FFI_TYPE_UINT32:
31886
+ case FFI_TYPE_SINT32:
31887
+ case FFI_TYPE_POINTER:
31889
+ gprvalue = **p_argv.ui;
31892
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
31893
+ *next_arg.u++ = gprvalue;
31895
+ *gpr_base.u++ = gprvalue;
31901
+ /* Check that we didn't overrun the stack... */
31902
+ FFI_ASSERT (copy_space.c >= next_arg.c);
31903
+ FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
31904
+ /* The assert below is testing that the number of integer arguments agrees
31905
+ with the number found in ffi_prep_cif_machdep(). However, intarg_count
31906
+ is incremented whenever we place an FP arg on the stack, so account for
31907
+ that before our assert test. */
31908
+#ifndef __NO_FPRS__
31909
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
31910
+ intarg_count -= fparg_count - NUM_FPR_ARG_REGISTERS;
31911
+ FFI_ASSERT (fpr_base.u
31912
+ <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
31914
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
31917
+#define MIN_CACHE_LINE_SIZE 8
31920
+flush_icache (char *wraddr, char *xaddr, int size)
31923
+ for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
31924
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
31925
+ : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
31926
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
31927
+ : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
31931
+ffi_status FFI_HIDDEN
31932
+ffi_prep_closure_loc_sysv (ffi_closure *closure,
31934
+ void (*fun) (ffi_cif *, void *, void **, void *),
31938
+ unsigned int *tramp;
31940
+ if (cif->abi < FFI_SYSV || cif->abi >= FFI_LAST_ABI)
31941
+ return FFI_BAD_ABI;
31943
+ tramp = (unsigned int *) &closure->tramp[0];
31944
+ tramp[0] = 0x7c0802a6; /* mflr r0 */
31945
+ tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */
31946
+ tramp[4] = 0x7d6802a6; /* mflr r11 */
31947
+ tramp[5] = 0x7c0803a6; /* mtlr r0 */
31948
+ tramp[6] = 0x800b0000; /* lwz r0,0(r11) */
31949
+ tramp[7] = 0x816b0004; /* lwz r11,4(r11) */
31950
+ tramp[8] = 0x7c0903a6; /* mtctr r0 */
31951
+ tramp[9] = 0x4e800420; /* bctr */
31952
+ *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */
31953
+ *(void **) &tramp[3] = codeloc; /* context */
31955
+ /* Flush the icache. */
31956
+ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
31958
+ closure->cif = cif;
31959
+ closure->fun = fun;
31960
+ closure->user_data = user_data;
31965
+/* Basically the trampoline invokes ffi_closure_SYSV, and on
31966
+ entry, r11 holds the address of the closure.
31967
+ After storing the registers that could possibly contain
31968
+ parameters to be passed into the stack frame and setting
31969
+ up space for a return value, ffi_closure_SYSV invokes the
31970
+ following helper function to do most of the work. */
31973
+ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
31974
+ unsigned long *pgr, ffi_dblfl *pfr,
31975
+ unsigned long *pst)
31977
+ /* rvalue is the pointer to space for return value in closure assembly */
31978
+ /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
31979
+ /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */
31980
+ /* pst is the pointer to outgoing parameter stack in original caller */
31983
+ ffi_type ** arg_types;
31985
+#ifndef __NO_FPRS__
31986
+ long nf = 0; /* number of floating registers already used */
31988
+ long ng = 0; /* number of general registers already used */
31990
+ ffi_cif *cif = closure->cif;
31991
+ unsigned size = cif->rtype->size;
31992
+ unsigned short rtypenum = cif->rtype->type;
31994
+ avalue = alloca (cif->nargs * sizeof (void *));
31996
+ /* First translate for softfloat/nonlinux */
31997
+ rtypenum = translate_float (cif->abi, rtypenum);
31999
+ /* Copy the caller's structure return value address so that the closure
32000
+ returns the data directly to the caller.
32001
+ For FFI_SYSV the result is passed in r3/r4 if the struct size is less
32002
+ or equal 8 bytes. */
32003
+ if (rtypenum == FFI_TYPE_STRUCT
32004
+ && !((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8))
32006
+ rvalue = (void *) *pgr;
32012
+ avn = cif->nargs;
32013
+ arg_types = cif->arg_types;
32015
+ /* Grab the addresses of the arguments from the stack frame. */
32016
+ while (i < avn) {
32017
+ unsigned short typenum = arg_types[i]->type;
32019
+ /* We may need to handle some values depending on ABI. */
32020
+ typenum = translate_float (cif->abi, typenum);
32024
+#ifndef __NO_FPRS__
32025
+ case FFI_TYPE_FLOAT:
32026
+ /* Unfortunately float values are stored as doubles
32027
+ in the ffi_closure_SYSV code (since we don't check
32028
+ the type in that routine). */
32029
+ if (nf < NUM_FPR_ARG_REGISTERS)
32031
+ /* FIXME? here we are really changing the values
32032
+ stored in the original calling routines outgoing
32033
+ parameter stack. This is probably a really
32034
+ naughty thing to do but... */
32035
+ double temp = pfr->d;
32036
+ pfr->f = (float) temp;
32048
+ case FFI_TYPE_DOUBLE:
32049
+ if (nf < NUM_FPR_ARG_REGISTERS)
32057
+ if (((long) pst) & 4)
32064
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
32065
+ case FFI_TYPE_LONGDOUBLE:
32066
+ if (nf < NUM_FPR_ARG_REGISTERS - 1)
32074
+ if (((long) pst) & 4)
32084
+ case FFI_TYPE_UINT128:
32085
+ /* Test if for the whole long double, 4 gprs are available.
32086
+ otherwise the stuff ends up on the stack. */
32087
+ if (ng < NUM_GPR_ARG_REGISTERS - 3)
32101
+ case FFI_TYPE_SINT8:
32102
+ case FFI_TYPE_UINT8:
32103
+#ifndef __LITTLE_ENDIAN__
32104
+ if (ng < NUM_GPR_ARG_REGISTERS)
32106
+ avalue[i] = (char *) pgr + 3;
32112
+ avalue[i] = (char *) pst + 3;
32118
+ case FFI_TYPE_SINT16:
32119
+ case FFI_TYPE_UINT16:
32120
+#ifndef __LITTLE_ENDIAN__
32121
+ if (ng < NUM_GPR_ARG_REGISTERS)
32123
+ avalue[i] = (char *) pgr + 2;
32129
+ avalue[i] = (char *) pst + 2;
32135
+ case FFI_TYPE_SINT32:
32136
+ case FFI_TYPE_UINT32:
32137
+ case FFI_TYPE_POINTER:
32138
+ if (ng < NUM_GPR_ARG_REGISTERS)
32151
+ case FFI_TYPE_STRUCT:
32152
+ /* Structs are passed by reference. The address will appear in a
32153
+ gpr if it is one of the first 8 arguments. */
32154
+ if (ng < NUM_GPR_ARG_REGISTERS)
32156
+ avalue[i] = (void *) *pgr;
32162
+ avalue[i] = (void *) *pst;
32167
+ case FFI_TYPE_SINT64:
32168
+ case FFI_TYPE_UINT64:
32169
+ /* Passing long long ints are complex, they must
32170
+ be passed in suitable register pairs such as
32171
+ (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
32172
+ and if the entire pair aren't available then the outgoing
32173
+ parameter stack is used for both but an alignment of 8
32174
+ must will be kept. So we must either look in pgr
32175
+ or pst to find the correct address for this type
32177
+ if (ng < NUM_GPR_ARG_REGISTERS - 1)
32181
+ /* skip r4, r6, r8 as starting points */
32191
+ if (((long) pst) & 4)
32195
+ ng = NUM_GPR_ARG_REGISTERS;
32206
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
32208
+ /* Tell ffi_closure_SYSV how to perform return type promotions.
32209
+ Because the FFI_SYSV ABI returns the structures <= 8 bytes in
32210
+ r3/r4 we have to tell ffi_closure_SYSV how to treat them. We
32211
+ combine the base type FFI_SYSV_TYPE_SMALL_STRUCT with the size of
32212
+ the struct less one. We never have a struct with size zero.
32213
+ See the comment in ffitarget.h about ordering. */
32214
+ if (rtypenum == FFI_TYPE_STRUCT
32215
+ && (cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
32216
+ return FFI_SYSV_TYPE_SMALL_STRUCT - 1 + size;
32220
--- a/src/libffi/src/powerpc/linux64.S
32221
+++ b/src/libffi/src/powerpc/linux64.S
32222
@@ -29,18 +29,25 @@
32223
#include <fficonfig.h>
32226
-#ifdef __powerpc64__
32228
.hidden ffi_call_LINUX64
32229
.globl ffi_call_LINUX64
32230
+# if _CALL_ELF == 2
32233
+ addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha
32234
+ addi %r2, %r2, .TOC.-ffi_call_LINUX64@l
32235
+ .localentry ffi_call_LINUX64, . - ffi_call_LINUX64
32237
.section ".opd","aw"
32240
-#ifdef _CALL_LINUX
32241
+# ifdef _CALL_LINUX
32242
.quad .L.ffi_call_LINUX64,.TOC.@tocbase,0
32243
.type ffi_call_LINUX64,@function
32245
.L.ffi_call_LINUX64:
32248
.hidden .ffi_call_LINUX64
32249
.globl .ffi_call_LINUX64
32250
.quad .ffi_call_LINUX64,.TOC.@tocbase,0
32252
.type .ffi_call_LINUX64,@function
32261
@@ -63,26 +71,35 @@
32262
mr %r31, %r5 /* flags, */
32263
mr %r30, %r6 /* rvalue, */
32264
mr %r29, %r7 /* function address. */
32265
+/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
32266
+ bctrl function call. */
32267
+# if _CALL_ELF == 2
32273
/* Call ffi_prep_args64. */
32275
-#ifdef _CALL_LINUX
32276
+# if defined _CALL_LINUX || _CALL_ELF == 2
32280
bl .ffi_prep_args64
32285
+# if _CALL_ELF == 2
32293
/* Now do the call. */
32294
/* Set up cr1 with bits 4-7 of the flags. */
32297
/* Get the address to call into CTR. */
32300
/* Load all those argument registers. */
32301
ld %r3, -32-(8*8)(%r28)
32302
ld %r4, -32-(7*8)(%r28)
32303
@@ -117,12 +134,17 @@
32305
/* This must follow the call immediately, the unwinder
32306
uses this to find out if r2 has been saved or not. */
32307
+# if _CALL_ELF == 2
32313
/* Now, deal with the return value. */
32315
- bt- 30, .Ldone_return_value
32316
- bt- 29, .Lfp_return_value
32317
+ bt 31, .Lstruct_return_value
32318
+ bt 30, .Ldone_return_value
32319
+ bt 29, .Lfp_return_value
32321
/* Fall through... */
32323
@@ -130,7 +152,7 @@
32324
/* Restore the registers we used and return. */
32327
- ld %r28, -32(%r1)
32328
+ ld %r28, -32(%r28)
32332
@@ -147,14 +169,48 @@
32333
.Lfloat_return_value:
32335
b .Ldone_return_value
32337
+.Lstruct_return_value:
32338
+ bf 29, .Lsmall_struct
32339
+ bf 28, .Lfloat_homog_return_value
32340
+ stfd %f1, 0(%r30)
32341
+ stfd %f2, 8(%r30)
32342
+ stfd %f3, 16(%r30)
32343
+ stfd %f4, 24(%r30)
32344
+ stfd %f5, 32(%r30)
32345
+ stfd %f6, 40(%r30)
32346
+ stfd %f7, 48(%r30)
32347
+ stfd %f8, 56(%r30)
32348
+ b .Ldone_return_value
32350
+.Lfloat_homog_return_value:
32351
+ stfs %f1, 0(%r30)
32352
+ stfs %f2, 4(%r30)
32353
+ stfs %f3, 8(%r30)
32354
+ stfs %f4, 12(%r30)
32355
+ stfs %f5, 16(%r30)
32356
+ stfs %f6, 20(%r30)
32357
+ stfs %f7, 24(%r30)
32358
+ stfs %f8, 28(%r30)
32359
+ b .Ldone_return_value
32364
+ b .Ldone_return_value
32368
.byte 0,12,0,1,128,4,0,0
32369
-#ifdef _CALL_LINUX
32370
+# if _CALL_ELF == 2
32371
+ .size ffi_call_LINUX64,.-ffi_call_LINUX64
32373
+# ifdef _CALL_LINUX
32374
.size ffi_call_LINUX64,.-.L.ffi_call_LINUX64
32377
.size .ffi_call_LINUX64,.-.ffi_call_LINUX64
32382
.section .eh_frame,EH_FRAME_FLAGS,@progbits
32384
@@ -197,8 +253,8 @@
32390
-#if defined __ELF__ && defined __linux__
32391
+# if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
32392
.section .note.GNU-stack,"",@progbits
32395
--- a/src/libffi/src/powerpc/ffi_linux64.c
32396
+++ b/src/libffi/src/powerpc/ffi_linux64.c
32398
+/* -----------------------------------------------------------------------
32399
+ ffi_linux64.c - Copyright (C) 2013 IBM
32400
+ Copyright (C) 2011 Anthony Green
32401
+ Copyright (C) 2011 Kyle Moffett
32402
+ Copyright (C) 2008 Red Hat, Inc
32403
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc
32404
+ Copyright (c) 1998 Geoffrey Keating
32406
+ PowerPC Foreign Function Interface
32408
+ Permission is hereby granted, free of charge, to any person obtaining
32409
+ a copy of this software and associated documentation files (the
32410
+ ``Software''), to deal in the Software without restriction, including
32411
+ without limitation the rights to use, copy, modify, merge, publish,
32412
+ distribute, sublicense, and/or sell copies of the Software, and to
32413
+ permit persons to whom the Software is furnished to do so, subject to
32414
+ the following conditions:
32416
+ The above copyright notice and this permission notice shall be included
32417
+ in all copies or substantial portions of the Software.
32419
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
32420
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32421
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
32422
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
32423
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32424
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
32425
+ OTHER DEALINGS IN THE SOFTWARE.
32426
+ ----------------------------------------------------------------------- */
32431
+#include "ffi_common.h"
32432
+#include "ffi_powerpc.h"
32435
+/* About the LINUX64 ABI. */
32437
+ NUM_GPR_ARG_REGISTERS64 = 8,
32438
+ NUM_FPR_ARG_REGISTERS64 = 13
32440
+enum { ASM_NEEDS_REGISTERS64 = 4 };
32443
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
32444
+/* Adjust size of ffi_type_longdouble. */
32446
+ffi_prep_types_linux64 (ffi_abi abi)
32448
+ if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
32450
+ ffi_type_longdouble.size = 8;
32451
+ ffi_type_longdouble.alignment = 8;
32455
+ ffi_type_longdouble.size = 16;
32456
+ ffi_type_longdouble.alignment = 16;
32462
+#if _CALL_ELF == 2
32463
+static unsigned int
32464
+discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
32468
+ case FFI_TYPE_FLOAT:
32469
+ case FFI_TYPE_DOUBLE:
32471
+ return (int) t->type;
32473
+ case FFI_TYPE_STRUCT:;
32475
+ unsigned int base_elt = 0, total_elnum = 0;
32476
+ ffi_type **el = t->elements;
32479
+ unsigned int el_elt, el_elnum = 0;
32480
+ el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
32482
+ || (base_elt && base_elt != el_elt))
32484
+ base_elt = el_elt;
32485
+ total_elnum += el_elnum;
32486
+ if (total_elnum > 8)
32490
+ *elnum = total_elnum;
32501
+/* Perform machine dependent cif processing */
32503
+ffi_prep_cif_linux64_core (ffi_cif *cif)
32507
+ unsigned i, fparg_count = 0, intarg_count = 0;
32508
+ unsigned flags = cif->flags;
32509
+#if _CALL_ELF == 2
32510
+ unsigned int elt, elnum;
32513
+#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
32514
+ /* If compiled without long double support.. */
32515
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
32516
+ return FFI_BAD_ABI;
32519
+ /* The machine-independent calculation of cif->bytes doesn't work
32520
+ for us. Redo the calculation. */
32521
+#if _CALL_ELF == 2
32522
+ /* Space for backchain, CR, LR, TOC and the asm's temp regs. */
32523
+ bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
32525
+ /* Space for the general registers. */
32526
+ bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
32528
+ /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
32530
+ bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
32532
+ /* Space for the mandatory parm save area and general registers. */
32533
+ bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
32536
+ /* Return value handling. */
32537
+ switch (cif->rtype->type)
32539
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
32540
+ case FFI_TYPE_LONGDOUBLE:
32541
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
32542
+ flags |= FLAG_RETURNS_128BITS;
32543
+ /* Fall through. */
32545
+ case FFI_TYPE_DOUBLE:
32546
+ flags |= FLAG_RETURNS_64BITS;
32547
+ /* Fall through. */
32548
+ case FFI_TYPE_FLOAT:
32549
+ flags |= FLAG_RETURNS_FP;
32552
+ case FFI_TYPE_UINT128:
32553
+ flags |= FLAG_RETURNS_128BITS;
32554
+ /* Fall through. */
32555
+ case FFI_TYPE_UINT64:
32556
+ case FFI_TYPE_SINT64:
32557
+ flags |= FLAG_RETURNS_64BITS;
32560
+ case FFI_TYPE_STRUCT:
32561
+#if _CALL_ELF == 2
32562
+ elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
32565
+ if (elt == FFI_TYPE_DOUBLE)
32566
+ flags |= FLAG_RETURNS_64BITS;
32567
+ flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
32570
+ if (cif->rtype->size <= 16)
32572
+ flags |= FLAG_RETURNS_SMST;
32577
+ flags |= FLAG_RETVAL_REFERENCE;
32578
+ /* Fall through. */
32579
+ case FFI_TYPE_VOID:
32580
+ flags |= FLAG_RETURNS_NOTHING;
32584
+ /* Returns 32-bit integer, or similar. Nothing to do here. */
32588
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
32590
+ unsigned int align;
32592
+ switch ((*ptr)->type)
32594
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
32595
+ case FFI_TYPE_LONGDOUBLE:
32596
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
32601
+ /* Fall through. */
32603
+ case FFI_TYPE_DOUBLE:
32604
+ case FFI_TYPE_FLOAT:
32607
+ if (fparg_count > NUM_FPR_ARG_REGISTERS64)
32608
+ flags |= FLAG_ARG_NEEDS_PSAVE;
32611
+ case FFI_TYPE_STRUCT:
32612
+ if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
32614
+ align = (*ptr)->alignment;
32617
+ align = align / 8;
32619
+ intarg_count = ALIGN (intarg_count, align);
32621
+ intarg_count += ((*ptr)->size + 7) / 8;
32622
+#if _CALL_ELF == 2
32623
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
32626
+ fparg_count += elnum;
32627
+ if (fparg_count > NUM_FPR_ARG_REGISTERS64)
32628
+ flags |= FLAG_ARG_NEEDS_PSAVE;
32633
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
32634
+ flags |= FLAG_ARG_NEEDS_PSAVE;
32638
+ case FFI_TYPE_POINTER:
32639
+ case FFI_TYPE_UINT64:
32640
+ case FFI_TYPE_SINT64:
32641
+ case FFI_TYPE_INT:
32642
+ case FFI_TYPE_UINT32:
32643
+ case FFI_TYPE_SINT32:
32644
+ case FFI_TYPE_UINT16:
32645
+ case FFI_TYPE_SINT16:
32646
+ case FFI_TYPE_UINT8:
32647
+ case FFI_TYPE_SINT8:
32648
+ /* Everything else is passed as a 8-byte word in a GPR, either
32649
+ the object itself or a pointer to it. */
32651
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
32652
+ flags |= FLAG_ARG_NEEDS_PSAVE;
32659
+ if (fparg_count != 0)
32660
+ flags |= FLAG_FP_ARGUMENTS;
32661
+ if (intarg_count > 4)
32662
+ flags |= FLAG_4_GPR_ARGUMENTS;
32664
+ /* Space for the FPR registers, if needed. */
32665
+ if (fparg_count != 0)
32666
+ bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
32668
+ /* Stack space. */
32669
+#if _CALL_ELF == 2
32670
+ if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
32671
+ bytes += intarg_count * sizeof (long);
32673
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
32674
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
32677
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
32678
+ bytes = (bytes + 15) & ~0xF;
32680
+ cif->flags = flags;
32681
+ cif->bytes = bytes;
32686
+ffi_status FFI_HIDDEN
32687
+ffi_prep_cif_linux64 (ffi_cif *cif)
32689
+ if ((cif->abi & FFI_LINUX) != 0)
32690
+ cif->nfixedargs = cif->nargs;
32691
+#if _CALL_ELF != 2
32692
+ else if (cif->abi == FFI_COMPAT_LINUX64)
32694
+ /* This call is from old code. Don't touch cif->nfixedargs
32695
+ since old code will be using a smaller cif. */
32696
+ cif->flags |= FLAG_COMPAT;
32697
+ /* Translate to new abi value. */
32698
+ cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
32702
+ return FFI_BAD_ABI;
32703
+ return ffi_prep_cif_linux64_core (cif);
32706
+ffi_status FFI_HIDDEN
32707
+ffi_prep_cif_linux64_var (ffi_cif *cif,
32708
+ unsigned int nfixedargs,
32709
+ unsigned int ntotalargs MAYBE_UNUSED)
32711
+ if ((cif->abi & FFI_LINUX) != 0)
32712
+ cif->nfixedargs = nfixedargs;
32713
+#if _CALL_ELF != 2
32714
+ else if (cif->abi == FFI_COMPAT_LINUX64)
32716
+ /* This call is from old code. Don't touch cif->nfixedargs
32717
+ since old code will be using a smaller cif. */
32718
+ cif->flags |= FLAG_COMPAT;
32719
+ /* Translate to new abi value. */
32720
+ cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
32724
+ return FFI_BAD_ABI;
32725
+#if _CALL_ELF == 2
32726
+ cif->flags |= FLAG_ARG_NEEDS_PSAVE;
32728
+ return ffi_prep_cif_linux64_core (cif);
32732
+/* ffi_prep_args64 is called by the assembly routine once stack space
32733
+ has been allocated for the function's arguments.
32735
+ The stack layout we want looks like this:
32737
+ | Ret addr from ffi_call_LINUX64 8bytes | higher addresses
32738
+ |--------------------------------------------|
32739
+ | CR save area 8bytes |
32740
+ |--------------------------------------------|
32741
+ | Previous backchain pointer 8 | stack pointer here
32742
+ |--------------------------------------------|<+ <<< on entry to
32743
+ | Saved r28-r31 4*8 | | ffi_call_LINUX64
32744
+ |--------------------------------------------| |
32745
+ | GPR registers r3-r10 8*8 | |
32746
+ |--------------------------------------------| |
32747
+ | FPR registers f1-f13 (optional) 13*8 | |
32748
+ |--------------------------------------------| |
32749
+ | Parameter save area | |
32750
+ |--------------------------------------------| |
32751
+ | TOC save area 8 | |
32752
+ |--------------------------------------------| | stack |
32753
+ | Linker doubleword 8 | | grows |
32754
+ |--------------------------------------------| | down V
32755
+ | Compiler doubleword 8 | |
32756
+ |--------------------------------------------| | lower addresses
32757
+ | Space for callee's LR 8 | |
32758
+ |--------------------------------------------| |
32759
+ | CR save area 8 | |
32760
+ |--------------------------------------------| | stack pointer here
32761
+ | Current backchain pointer 8 |-/ during
32762
+ |--------------------------------------------| <<< ffi_call_LINUX64
32767
+ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
32769
+ const unsigned long bytes = ecif->cif->bytes;
32770
+ const unsigned long flags = ecif->cif->flags;
32775
+ unsigned long *ul;
32781
+ /* 'stacktop' points at the previous backchain pointer. */
32784
+ /* 'next_arg' points at the space for gpr3, and grows upwards as
32785
+ we use GPR registers, then continues at rest. */
32791
+ /* 'fpr_base' points at the space for fpr3, and grows upwards as
32792
+ we use FPR registers. */
32794
+ unsigned int fparg_count;
32796
+ unsigned int i, words, nargs, nfixedargs;
32798
+ double double_tmp;
32803
+ signed char **sc;
32804
+ unsigned char **uc;
32805
+ signed short **ss;
32806
+ unsigned short **us;
32808
+ unsigned int **ui;
32809
+ unsigned long **ul;
32813
+ unsigned long gprvalue;
32814
+ unsigned long align;
32816
+ stacktop.c = (char *) stack + bytes;
32817
+ gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
32818
+ gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
32819
+#if _CALL_ELF == 2
32820
+ rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
32822
+ rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
32824
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
32826
+ next_arg.ul = gpr_base.ul;
32828
+ /* Check that everything starts aligned properly. */
32829
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
32830
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
32831
+ FFI_ASSERT ((bytes & 0xF) == 0);
32833
+ /* Deal with return values that are actually pass-by-reference. */
32834
+ if (flags & FLAG_RETVAL_REFERENCE)
32835
+ *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
32837
+ /* Now for the arguments. */
32838
+ p_argv.v = ecif->avalue;
32839
+ nargs = ecif->cif->nargs;
32840
+#if _CALL_ELF != 2
32841
+ nfixedargs = (unsigned) -1;
32842
+ if ((flags & FLAG_COMPAT) == 0)
32844
+ nfixedargs = ecif->cif->nfixedargs;
32845
+ for (ptr = ecif->cif->arg_types, i = 0;
32847
+ i++, ptr++, p_argv.v++)
32849
+#if _CALL_ELF == 2
32850
+ unsigned int elt, elnum;
32853
+ switch ((*ptr)->type)
32855
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
32856
+ case FFI_TYPE_LONGDOUBLE:
32857
+ if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
32859
+ double_tmp = (*p_argv.d)[0];
32860
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
32862
+ *fpr_base.d++ = double_tmp;
32863
+# if _CALL_ELF != 2
32864
+ if ((flags & FLAG_COMPAT) != 0)
32865
+ *next_arg.d = double_tmp;
32869
+ *next_arg.d = double_tmp;
32870
+ if (++next_arg.ul == gpr_end.ul)
32871
+ next_arg.ul = rest.ul;
32873
+ double_tmp = (*p_argv.d)[1];
32874
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
32876
+ *fpr_base.d++ = double_tmp;
32877
+# if _CALL_ELF != 2
32878
+ if ((flags & FLAG_COMPAT) != 0)
32879
+ *next_arg.d = double_tmp;
32883
+ *next_arg.d = double_tmp;
32884
+ if (++next_arg.ul == gpr_end.ul)
32885
+ next_arg.ul = rest.ul;
32887
+ FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
32888
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
32891
+ /* Fall through. */
32893
+ case FFI_TYPE_DOUBLE:
32894
+ double_tmp = **p_argv.d;
32895
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
32897
+ *fpr_base.d++ = double_tmp;
32898
+#if _CALL_ELF != 2
32899
+ if ((flags & FLAG_COMPAT) != 0)
32900
+ *next_arg.d = double_tmp;
32904
+ *next_arg.d = double_tmp;
32905
+ if (++next_arg.ul == gpr_end.ul)
32906
+ next_arg.ul = rest.ul;
32908
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
32911
+ case FFI_TYPE_FLOAT:
32912
+ double_tmp = **p_argv.f;
32913
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
32915
+ *fpr_base.d++ = double_tmp;
32916
+#if _CALL_ELF != 2
32917
+ if ((flags & FLAG_COMPAT) != 0)
32918
+ *next_arg.f = (float) double_tmp;
32922
+ *next_arg.f = (float) double_tmp;
32923
+ if (++next_arg.ul == gpr_end.ul)
32924
+ next_arg.ul = rest.ul;
32926
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
32929
+ case FFI_TYPE_STRUCT:
32930
+ if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
32932
+ align = (*ptr)->alignment;
32936
+ next_arg.p = ALIGN (next_arg.p, align);
32938
+#if _CALL_ELF == 2
32939
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
32948
+ arg.v = *p_argv.v;
32949
+ if (elt == FFI_TYPE_FLOAT)
32953
+ double_tmp = *arg.f++;
32954
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64
32955
+ && i < nfixedargs)
32956
+ *fpr_base.d++ = double_tmp;
32958
+ *next_arg.f = (float) double_tmp;
32959
+ if (++next_arg.f == gpr_end.f)
32960
+ next_arg.f = rest.f;
32963
+ while (--elnum != 0);
32964
+ if ((next_arg.p & 3) != 0)
32966
+ if (++next_arg.f == gpr_end.f)
32967
+ next_arg.f = rest.f;
32973
+ double_tmp = *arg.d++;
32974
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
32975
+ *fpr_base.d++ = double_tmp;
32977
+ *next_arg.d = double_tmp;
32978
+ if (++next_arg.d == gpr_end.d)
32979
+ next_arg.d = rest.d;
32982
+ while (--elnum != 0);
32987
+ words = ((*ptr)->size + 7) / 8;
32988
+ if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
32990
+ size_t first = gpr_end.c - next_arg.c;
32991
+ memcpy (next_arg.c, *p_argv.c, first);
32992
+ memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
32993
+ next_arg.c = rest.c + words * 8 - first;
32997
+ char *where = next_arg.c;
32999
+#ifndef __LITTLE_ENDIAN__
33000
+ /* Structures with size less than eight bytes are passed
33002
+ if ((*ptr)->size < 8)
33003
+ where += 8 - (*ptr)->size;
33005
+ memcpy (where, *p_argv.c, (*ptr)->size);
33006
+ next_arg.ul += words;
33007
+ if (next_arg.ul == gpr_end.ul)
33008
+ next_arg.ul = rest.ul;
33013
+ case FFI_TYPE_UINT8:
33014
+ gprvalue = **p_argv.uc;
33016
+ case FFI_TYPE_SINT8:
33017
+ gprvalue = **p_argv.sc;
33019
+ case FFI_TYPE_UINT16:
33020
+ gprvalue = **p_argv.us;
33022
+ case FFI_TYPE_SINT16:
33023
+ gprvalue = **p_argv.ss;
33025
+ case FFI_TYPE_UINT32:
33026
+ gprvalue = **p_argv.ui;
33028
+ case FFI_TYPE_INT:
33029
+ case FFI_TYPE_SINT32:
33030
+ gprvalue = **p_argv.si;
33033
+ case FFI_TYPE_UINT64:
33034
+ case FFI_TYPE_SINT64:
33035
+ case FFI_TYPE_POINTER:
33036
+ gprvalue = **p_argv.ul;
33038
+ *next_arg.ul++ = gprvalue;
33039
+ if (next_arg.ul == gpr_end.ul)
33040
+ next_arg.ul = rest.ul;
33045
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
33046
+ || (next_arg.ul >= gpr_base.ul
33047
+ && next_arg.ul <= gpr_base.ul + 4));
33051
+#if _CALL_ELF == 2
33052
+#define MIN_CACHE_LINE_SIZE 8
33055
+flush_icache (char *wraddr, char *xaddr, int size)
33058
+ for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
33059
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
33060
+ : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
33061
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
33062
+ : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
33068
+ffi_prep_closure_loc_linux64 (ffi_closure *closure,
33070
+ void (*fun) (ffi_cif *, void *, void **, void *),
33074
+#if _CALL_ELF == 2
33075
+ unsigned int *tramp = (unsigned int *) &closure->tramp[0];
33077
+ if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
33078
+ return FFI_BAD_ABI;
33080
+ tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */
33081
+ tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */
33082
+ tramp[2] = 0x7d8903a6; /* mtctr 12 */
33083
+ tramp[3] = 0x4e800420; /* bctr */
33084
+ /* 1: .quad function_addr */
33085
+ /* 2: .quad context */
33086
+ *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
33087
+ *(void **) &tramp[6] = codeloc;
33088
+ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
33090
+ void **tramp = (void **) &closure->tramp[0];
33092
+ if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
33093
+ return FFI_BAD_ABI;
33095
+ /* Copy function address and TOC from ffi_closure_LINUX64. */
33096
+ memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
33097
+ tramp[2] = codeloc;
33100
+ closure->cif = cif;
33101
+ closure->fun = fun;
33102
+ closure->user_data = user_data;
33109
+ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
33110
+ unsigned long *pst, ffi_dblfl *pfr)
33112
+ /* rvalue is the pointer to space for return value in closure assembly */
33113
+ /* pst is the pointer to parameter save area
33114
+ (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
33115
+ /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
33118
+ ffi_type **arg_types;
33119
+ unsigned long i, avn, nfixedargs;
33121
+ ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
33122
+ unsigned long align;
33124
+ cif = closure->cif;
33125
+ avalue = alloca (cif->nargs * sizeof (void *));
33127
+ /* Copy the caller's structure return value address so that the
33128
+ closure returns the data directly to the caller. */
33129
+ if (cif->rtype->type == FFI_TYPE_STRUCT
33130
+ && (cif->flags & FLAG_RETURNS_SMST) == 0)
33132
+ rvalue = (void *) *pst;
33137
+ avn = cif->nargs;
33138
+#if _CALL_ELF != 2
33139
+ nfixedargs = (unsigned) -1;
33140
+ if ((cif->flags & FLAG_COMPAT) == 0)
33142
+ nfixedargs = cif->nfixedargs;
33143
+ arg_types = cif->arg_types;
33145
+ /* Grab the addresses of the arguments from the stack frame. */
33148
+ unsigned int elt, elnum;
33150
+ switch (arg_types[i]->type)
33152
+ case FFI_TYPE_SINT8:
33153
+ case FFI_TYPE_UINT8:
33154
+#ifndef __LITTLE_ENDIAN__
33155
+ avalue[i] = (char *) pst + 7;
33160
+ case FFI_TYPE_SINT16:
33161
+ case FFI_TYPE_UINT16:
33162
+#ifndef __LITTLE_ENDIAN__
33163
+ avalue[i] = (char *) pst + 6;
33168
+ case FFI_TYPE_SINT32:
33169
+ case FFI_TYPE_UINT32:
33170
+#ifndef __LITTLE_ENDIAN__
33171
+ avalue[i] = (char *) pst + 4;
33176
+ case FFI_TYPE_SINT64:
33177
+ case FFI_TYPE_UINT64:
33178
+ case FFI_TYPE_POINTER:
33183
+ case FFI_TYPE_STRUCT:
33184
+ if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
33186
+ align = arg_types[i]->alignment;
33190
+ pst = (unsigned long *) ALIGN ((size_t) pst, align);
33193
+#if _CALL_ELF == 2
33194
+ elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
33200
+ unsigned long *ul;
33206
+ /* Repackage the aggregate from its parts. The
33207
+ aggregate size is not greater than the space taken by
33208
+ the registers so store back to the register/parameter
33210
+ if (pfr + elnum <= end_pfr)
33215
+ avalue[i] = to.v;
33217
+ if (elt == FFI_TYPE_FLOAT)
33221
+ if (pfr < end_pfr && i < nfixedargs)
33223
+ *to.f = (float) pfr->d;
33231
+ while (--elnum != 0);
33237
+ if (pfr < end_pfr && i < nfixedargs)
33247
+ while (--elnum != 0);
33252
+#ifndef __LITTLE_ENDIAN__
33253
+ /* Structures with size less than eight bytes are passed
33255
+ if (arg_types[i]->size < 8)
33256
+ avalue[i] = (char *) pst + 8 - arg_types[i]->size;
33261
+ pst += (arg_types[i]->size + 7) / 8;
33264
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
33265
+ case FFI_TYPE_LONGDOUBLE:
33266
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
33268
+ if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
33275
+ if (pfr < end_pfr && i < nfixedargs)
33277
+ /* Passed partly in f13 and partly on the stack.
33278
+ Move it all to the stack. */
33279
+ *pst = *(unsigned long *) pfr;
33287
+ /* Fall through. */
33289
+ case FFI_TYPE_DOUBLE:
33290
+ /* On the outgoing stack all values are aligned to 8 */
33291
+ /* there are 13 64bit floating point registers */
33293
+ if (pfr < end_pfr && i < nfixedargs)
33303
+ case FFI_TYPE_FLOAT:
33304
+ if (pfr < end_pfr && i < nfixedargs)
33306
+ /* Float values are stored as doubles in the
33307
+ ffi_closure_LINUX64 code. Fix them here. */
33308
+ pfr->f = (float) pfr->d;
33325
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
33327
+ /* Tell ffi_closure_LINUX64 how to perform return type promotions. */
33328
+ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
33330
+ if ((cif->flags & FLAG_RETURNS_FP) == 0)
33331
+ return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
33332
+ else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
33333
+ return FFI_V2_TYPE_DOUBLE_HOMOG;
33335
+ return FFI_V2_TYPE_FLOAT_HOMOG;
33337
+ return cif->rtype->type;
33340
--- a/src/libffi/src/types.c
33341
+++ b/src/libffi/src/types.c
33346
+#define FFI_NONCONST_TYPEDEF(name, type, id) \
33347
+struct struct_align_##name { \
33351
+ffi_type ffi_type_##name = { \
33353
+ offsetof(struct struct_align_##name, x), \
33357
/* Size and alignment are fake here. They must not be 0. */
33358
const ffi_type ffi_type_void = {
33359
1, 1, FFI_TYPE_VOID, NULL
33362
const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
33363
#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
33364
+# if HAVE_LONG_DOUBLE_VARIANT
33365
+FFI_NONCONST_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
33367
FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
33370
--- a/src/libffi/src/prep_cif.c
33371
+++ b/src/libffi/src/prep_cif.c
33372
@@ -126,6 +126,10 @@
33376
+#if HAVE_LONG_DOUBLE_VARIANT
33377
+ ffi_prep_types (abi);
33380
/* Initialize the return type if necessary */
33381
if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
33382
return FFI_BAD_TYPEDEF;
33383
--- a/src/libffi/testsuite/Makefile.in
33384
+++ b/src/libffi/testsuite/Makefile.in
33388
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
33389
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
33390
INSTALL = @INSTALL@
33391
INSTALL_DATA = @INSTALL_DATA@
33392
INSTALL_PROGRAM = @INSTALL_PROGRAM@
33393
--- a/src/libffi/testsuite/libffi.call/cls_double_va.c
33394
+++ b/src/libffi/testsuite/libffi.call/cls_double_va.c
33395
@@ -38,26 +38,24 @@
33397
/* This printf call is variadic */
33398
CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
33399
- arg_types) == FFI_OK);
33400
+ arg_types) == FFI_OK);
33403
args[1] = &doubleArg;
33406
ffi_call(&cif, FFI_FN(printf), &res, args);
33407
- // { dg-output "7.0" }
33408
+ /* { dg-output "7.0" } */
33409
printf("res: %d\n", (int) res);
33410
- // { dg-output "\nres: 4" }
33411
+ /* { dg-output "\nres: 4" } */
33413
- /* The call to cls_double_va_fn is static, so have to use a normal prep_cif */
33414
- CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint, arg_types) == FFI_OK);
33415
+ CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL,
33416
+ code) == FFI_OK);
33418
- CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL, code) == FFI_OK);
33420
- res = ((int(*)(char*, double))(code))(format, doubleArg);
33421
- // { dg-output "\n7.0" }
33422
+ res = ((int(*)(char*, ...))(code))(format, doubleArg);
33423
+ /* { dg-output "\n7.0" } */
33424
printf("res: %d\n", (int) res);
33425
- // { dg-output "\nres: 4" }
33426
+ /* { dg-output "\nres: 4" } */
33430
--- a/src/libffi/testsuite/libffi.call/cls_longdouble_va.c
33431
+++ b/src/libffi/testsuite/libffi.call/cls_longdouble_va.c
33432
@@ -38,27 +38,24 @@
33434
/* This printf call is variadic */
33435
CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
33436
- arg_types) == FFI_OK);
33437
+ arg_types) == FFI_OK);
33443
ffi_call(&cif, FFI_FN(printf), &res, args);
33444
- // { dg-output "7.0" }
33445
+ /* { dg-output "7.0" } */
33446
printf("res: %d\n", (int) res);
33447
- // { dg-output "\nres: 4" }
33448
+ /* { dg-output "\nres: 4" } */
33450
- /* The call to cls_longdouble_va_fn is static, so have to use a normal prep_cif */
33451
- CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint,
33452
- arg_types) == FFI_OK);
33453
+ CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL,
33454
+ code) == FFI_OK);
33456
- CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL, code) == FFI_OK);
33458
- res = ((int(*)(char*, long double))(code))(format, ldArg);
33459
- // { dg-output "\n7.0" }
33460
+ res = ((int(*)(char*, ...))(code))(format, ldArg);
33461
+ /* { dg-output "\n7.0" } */
33462
printf("res: %d\n", (int) res);
33463
- // { dg-output "\nres: 4" }
33464
+ /* { dg-output "\nres: 4" } */
33468
--- a/src/libffi/configure.ac
33469
+++ b/src/libffi/configure.ac
33471
AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
33473
TARGETDIR="unknown"
33474
+HAVE_LONG_DOUBLE_VARIANT=0
33477
TARGET=AARCH64; TARGETDIR=aarch64
33478
@@ -162,6 +163,7 @@
33480
powerpc*-*-linux* | powerpc-*-sysv*)
33481
TARGET=POWERPC; TARGETDIR=powerpc
33482
+ HAVE_LONG_DOUBLE_VARIANT=1
33484
powerpc-*-amigaos*)
33485
TARGET=POWERPC; TARGETDIR=powerpc
33486
@@ -177,6 +179,7 @@
33488
powerpc-*-freebsd* | powerpc-*-openbsd*)
33489
TARGET=POWERPC_FREEBSD; TARGETDIR=powerpc
33490
+ HAVE_LONG_DOUBLE_VARIANT=1
33492
powerpc64-*-freebsd*)
33493
TARGET=POWERPC; TARGETDIR=powerpc
33494
@@ -273,14 +276,20 @@
33495
# Also AC_SUBST this variable for ffi.h.
33496
if test -z "$HAVE_LONG_DOUBLE"; then
33498
- if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
33499
- if test $ac_cv_sizeof_long_double != 0; then
33500
+ if test $ac_cv_sizeof_long_double != 0; then
33501
+ if test $HAVE_LONG_DOUBLE_VARIANT != 0; then
33502
+ AC_DEFINE(HAVE_LONG_DOUBLE_VARIANT, 1, [Define if you support more than one size of the long double type])
33504
- AC_DEFINE(HAVE_LONG_DOUBLE, 1, [Define if you have the long double type and it is bigger than a double])
33506
+ if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
33507
+ HAVE_LONG_DOUBLE=1
33508
+ AC_DEFINE(HAVE_LONG_DOUBLE, 1, [Define if you have the long double type and it is bigger than a double])
33513
AC_SUBST(HAVE_LONG_DOUBLE)
33514
+AC_SUBST(HAVE_LONG_DOUBLE_VARIANT)
33518
--- a/src/libffi/doc/libffi.texi
33519
+++ b/src/libffi/doc/libffi.texi
33520
@@ -184,11 +184,11 @@
33522
@var{rvalue} is a pointer to a chunk of memory that will hold the
33523
result of the function call. This must be large enough to hold the
33524
-result and must be suitably aligned; it is the caller's responsibility
33525
+result, no smaller than the system register size (generally 32 or 64
33526
+bits), and must be suitably aligned; it is the caller's responsibility
33527
to ensure this. If @var{cif} declares that the function returns
33528
@code{void} (using @code{ffi_type_void}), then @var{rvalue} is
33529
-ignored. If @var{rvalue} is @samp{NULL}, then the return value is
33533
@var{avalues} is a vector of @code{void *} pointers that point to the
33534
memory locations holding the argument values for a call. If @var{cif}
33535
@@ -214,7 +214,7 @@
33542
/* Initialize the argument info vectors */
33543
args[0] = &ffi_type_pointer;
33544
@@ -222,7 +222,7 @@
33546
/* Initialize the cif */
33547
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1,
33548
- &ffi_type_uint, args) == FFI_OK)
33549
+ &ffi_type_sint, args) == FFI_OK)
33551
s = "Hello World!";
33552
ffi_call(&cif, puts, &rc, values);
33553
@@ -360,7 +360,7 @@
33554
new @code{ffi_type} object for it.
33558
+@deftp {Data type} ffi_type
33559
The @code{ffi_type} has the following members:
33562
@@ -414,6 +414,7 @@
33565
tm_type.size = tm_type.alignment = 0;
33566
+ tm_type.type = FFI_TYPE_STRUCT;
33567
tm_type.elements = &tm_type_elements;
33569
for (i = 0; i < 9; i++)
33570
@@ -540,21 +541,23 @@
33573
/* Acts like puts with the file given at time of enclosure. */
33574
-void puts_binding(ffi_cif *cif, unsigned int *ret, void* args[],
33576
+void puts_binding(ffi_cif *cif, void *ret, void* args[],
33579
- *ret = fputs(*(char **)args[0], stream);
33580
+ *(ffi_arg *)ret = fputs(*(char **)args[0], (FILE *)stream);
33583
+typedef int (*puts_t)(char *);
33589
ffi_closure *closure;
33591
- int (*bound_puts)(char *);
33592
+ void *bound_puts;
33596
/* Allocate closure and bound_puts */
33597
closure = ffi_closure_alloc(sizeof(ffi_closure), &bound_puts);
33599
@@ -565,13 +568,13 @@
33601
/* Initialize the cif */
33602
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1,
33603
- &ffi_type_uint, args) == FFI_OK)
33604
+ &ffi_type_sint, args) == FFI_OK)
33606
/* Initialize the closure, setting stream to stdout */
33607
- if (ffi_prep_closure_loc(closure, &cif, puts_binding,
33608
+ if (ffi_prep_closure_loc(closure, &cif, puts_binding,
33609
stdout, bound_puts) == FFI_OK)
33611
- rc = bound_puts("Hello World!");
33612
+ rc = ((puts_t)bound_puts)("Hello World!");
33613
/* rc now holds the result of the call to fputs */
33616
--- a/src/libffi/Makefile.am
33617
+++ b/src/libffi/Makefile.am
33618
@@ -15,10 +15,12 @@
33619
src/ia64/unix.S src/mips/ffi.c src/mips/n32.S src/mips/o32.S \
33620
src/mips/ffitarget.h src/m32r/ffi.c src/m32r/sysv.S \
33621
src/m32r/ffitarget.h src/m68k/ffi.c src/m68k/sysv.S \
33622
- src/m68k/ffitarget.h src/powerpc/ffi.c src/powerpc/sysv.S \
33623
- src/powerpc/linux64.S src/powerpc/linux64_closure.S \
33624
- src/powerpc/ppc_closure.S src/powerpc/asm.h \
33625
- src/powerpc/aix.S src/powerpc/darwin.S \
33626
+ src/m68k/ffitarget.h \
33627
+ src/powerpc/ffi.c src/powerpc/ffi_powerpc.h \
33628
+ src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c \
33629
+ src/powerpc/sysv.S src/powerpc/linux64.S \
33630
+ src/powerpc/linux64_closure.S src/powerpc/ppc_closure.S \
33631
+ src/powerpc/asm.h src/powerpc/aix.S src/powerpc/darwin.S \
33632
src/powerpc/aix_closure.S src/powerpc/darwin_closure.S \
33633
src/powerpc/ffi_darwin.c src/powerpc/ffitarget.h \
33634
src/s390/ffi.c src/s390/sysv.S src/s390/ffitarget.h \
33635
@@ -179,7 +181,7 @@
33636
nodist_libffi_la_SOURCES += src/m68k/ffi.c src/m68k/sysv.S
33639
-nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
33640
+nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/ffi_linux64.c src/powerpc/sysv.S src/powerpc/ppc_closure.S src/powerpc/linux64.S src/powerpc/linux64_closure.S
33643
nodist_libffi_la_SOURCES += src/powerpc/ffi_darwin.c src/powerpc/aix.S src/powerpc/aix_closure.S
33644
@@ -188,7 +190,7 @@
33645
nodist_libffi_la_SOURCES += src/powerpc/ffi_darwin.c src/powerpc/darwin.S src/powerpc/darwin_closure.S
33648
-nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
33649
+nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/ffi_sysv.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
33652
nodist_libffi_la_SOURCES += src/aarch64/sysv.S src/aarch64/ffi.c
33653
--- a/src/libffi/man/Makefile.in
33654
+++ b/src/libffi/man/Makefile.in
33655
@@ -111,6 +111,7 @@
33658
HAVE_LONG_DOUBLE = @HAVE_LONG_DOUBLE@
33659
+HAVE_LONG_DOUBLE_VARIANT = @HAVE_LONG_DOUBLE_VARIANT@
33660
INSTALL = @INSTALL@
33661
INSTALL_DATA = @INSTALL_DATA@
33662
INSTALL_PROGRAM = @INSTALL_PROGRAM@
33663
--- a/src/libssp/configure
33664
+++ b/src/libssp/configure
33665
@@ -6385,7 +6385,7 @@
33669
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
33670
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
33671
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
33672
# Find out which ABI we are using.
33673
echo 'int i;' > conftest.$ac_ext
33674
@@ -6410,7 +6410,10 @@
33678
- ppc64-*linux*|powerpc64-*linux*)
33679
+ powerpc64le-*linux*)
33680
+ LD="${LD-ld} -m elf32lppclinux"
33682
+ powerpc64-*linux*)
33683
LD="${LD-ld} -m elf32ppclinux"
33686
@@ -6429,7 +6432,10 @@
33688
LD="${LD-ld} -m elf_x86_64"
33690
- ppc*-*linux*|powerpc*-*linux*)
33691
+ powerpcle-*linux*)
33692
+ LD="${LD-ld} -m elf64lppc"
33695
LD="${LD-ld} -m elf64ppc"
33697
s390*-*linux*|s390*-*tpf*)
33698
@@ -10658,7 +10664,7 @@
33699
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
33700
lt_status=$lt_dlunknown
33701
cat > conftest.$ac_ext <<_LT_EOF
33702
-#line 10661 "configure"
33703
+#line 10667 "configure"
33704
#include "confdefs.h"
33707
@@ -10764,7 +10770,7 @@
33708
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
33709
lt_status=$lt_dlunknown
33710
cat > conftest.$ac_ext <<_LT_EOF
33711
-#line 10767 "configure"
33712
+#line 10773 "configure"
33713
#include "confdefs.h"
33716
--- a/src/libcpp/ChangeLog.ibm
33717
+++ b/src/libcpp/ChangeLog.ibm
33719
+2013-11-18 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
33721
+ * lex.c (search_line_fast): Correct for little endian.
33723
--- a/src/libcpp/lex.c
33724
+++ b/src/libcpp/lex.c
33725
@@ -559,8 +559,13 @@
33726
beginning with all ones and shifting in zeros according to the
33727
mis-alignment. The LVSR instruction pulls the exact shift we
33728
want from the address. */
33729
+#ifdef __BIG_ENDIAN__
33730
mask = __builtin_vec_lvsr(0, s);
33731
mask = __builtin_vec_perm(zero, ones, mask);
33733
+ mask = __builtin_vec_lvsl(0, s);
33734
+ mask = __builtin_vec_perm(ones, zero, mask);
33738
/* While altivec loads mask addresses, we still need to align S so
33739
@@ -624,7 +629,11 @@
33740
/* L now contains 0xff in bytes for which we matched one of the
33741
relevant characters. We can find the byte index by finding
33742
its bit index and dividing by 8. */
33743
+#ifdef __BIG_ENDIAN__
33744
l = __builtin_clzl(l) >> 3;
33746
+ l = __builtin_ctzl(l) >> 3;