1
# DP: Changes for the Linaro 4.8-2013.08 release.
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@201477 \
4
svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@201722 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/ChangeLog.linaro
8
+++ b/src/libitm/ChangeLog.linaro
10
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
12
+ GCC Linaro 4.8-2013.07-1 released.
14
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
16
+ GCC Linaro 4.8-2013.07 released.
18
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
20
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
22
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
24
+ GCC Linaro 4.8-2013.05 released.
26
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
28
+ * GCC Linaro 4.8-2013.04 released.
29
--- a/src/libgomp/ChangeLog.linaro
30
+++ b/src/libgomp/ChangeLog.linaro
32
+2013-07-22 Yvan Roux <yvan.roux@linaro.org>
34
+ Backport from trunk r200521.
35
+ 2013-06-28 Marcus Shawcroft <marcus.shawcroft@arm.com>
37
+ * testsuite/libgomp.fortran/strassen.f90:
38
+ Add dg-skip-if aarch64_tiny.
40
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
42
+ GCC Linaro 4.8-2013.07-1 released.
44
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
46
+ GCC Linaro 4.8-2013.07 released.
48
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
50
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
52
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
54
+ GCC Linaro 4.8-2013.05 released.
56
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
58
+ * GCC Linaro 4.8-2013.04 released.
59
--- a/src/libgomp/testsuite/libgomp.fortran/strassen.f90
60
+++ b/src/libgomp/testsuite/libgomp.fortran/strassen.f90
62
! { dg-options "-O2" }
63
+! { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} }
65
program strassen_matmul
67
--- a/src/libquadmath/ChangeLog.linaro
68
+++ b/src/libquadmath/ChangeLog.linaro
70
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
72
+ GCC Linaro 4.8-2013.07-1 released.
74
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
76
+ GCC Linaro 4.8-2013.07 released.
78
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
80
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
82
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
84
+ GCC Linaro 4.8-2013.05 released.
86
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
88
+ * GCC Linaro 4.8-2013.04 released.
89
--- a/src/libsanitizer/sanitizer_common/sanitizer_linux.cc
90
+++ b/src/libsanitizer/sanitizer_common/sanitizer_linux.cc
92
CHECK_EQ(*current_++, ' ');
93
while (IsDecimal(*current_))
95
- CHECK_EQ(*current_++, ' ');
96
+ // Qemu may lack the trailing space.
97
+ // http://code.google.com/p/address-sanitizer/issues/detail?id=160
98
+ // CHECK_EQ(*current_++, ' ');
100
while (current_ < next_line && *current_ == ' ')
102
--- a/src/libsanitizer/ChangeLog.linaro
103
+++ b/src/libsanitizer/ChangeLog.linaro
105
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
107
+ GCC Linaro 4.8-2013.07-1 released.
109
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
111
+ GCC Linaro 4.8-2013.07 released.
113
+2013-06-20 Christophe Lyon <christophe.lyon@linaro.org>
115
+ Backport from trunk r198683.
116
+ 2013-05-07 Christophe Lyon <christophe.lyon@linaro.org>
118
+ * configure.tgt: Add ARM pattern.
120
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
122
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
124
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
126
+ Backport from trunk r199606.
127
+ 2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
129
+ * sanitizer_common/sanitizer_linux.cc (MemoryMappingLayout::Next):
130
+ Cherry pick upstream r182922.
132
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
134
+ GCC Linaro 4.8-2013.05 released.
136
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
138
+ * GCC Linaro 4.8-2013.04 released.
139
--- a/src/libsanitizer/configure.tgt
140
+++ b/src/libsanitizer/configure.tgt
147
x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
150
--- a/src/zlib/ChangeLog.linaro
151
+++ b/src/zlib/ChangeLog.linaro
153
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
155
+ GCC Linaro 4.8-2013.07-1 released.
157
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
159
+ GCC Linaro 4.8-2013.07 released.
161
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
163
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
165
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
167
+ GCC Linaro 4.8-2013.05 released.
169
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
171
+ * GCC Linaro 4.8-2013.04 released.
172
--- a/src/libstdc++-v3/ChangeLog.linaro
173
+++ b/src/libstdc++-v3/ChangeLog.linaro
175
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
177
+ GCC Linaro 4.8-2013.07-1 released.
179
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
181
+ GCC Linaro 4.8-2013.07 released.
183
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
185
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
187
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
189
+ GCC Linaro 4.8-2013.05 released.
191
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
193
+ * GCC Linaro 4.8-2013.04 released.
194
--- a/src/intl/ChangeLog.linaro
195
+++ b/src/intl/ChangeLog.linaro
197
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
199
+ GCC Linaro 4.8-2013.07-1 released.
201
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
203
+ GCC Linaro 4.8-2013.07 released.
205
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
207
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
209
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
211
+ GCC Linaro 4.8-2013.05 released.
213
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
215
+ * GCC Linaro 4.8-2013.04 released.
216
--- a/src/ChangeLog.linaro
217
+++ b/src/ChangeLog.linaro
219
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
221
+ GCC Linaro 4.8-2013.07-1 released.
223
+2013-07-09 Christophe Lyon <christophe.lyon@linaro.org>
226
+ * LINARO-VERSION: Bump version.
228
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
230
+ GCC Linaro 4.8-2013.07 released.
232
+2013-06-18 Rob Savoye <rob.savoye@linaro.org>
235
+ * LINARO-VERSION: Bump version.
237
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
239
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
241
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
243
+ GCC Linaro 4.8-2013.05 released.
245
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
247
+ * GCC Linaro 4.8-2013.04 released.
248
--- a/src/libmudflap/ChangeLog.linaro
249
+++ b/src/libmudflap/ChangeLog.linaro
251
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
253
+ GCC Linaro 4.8-2013.07-1 released.
255
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
257
+ GCC Linaro 4.8-2013.07 released.
259
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
261
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
263
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
265
+ GCC Linaro 4.8-2013.05 released.
267
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
269
+ * GCC Linaro 4.8-2013.04 released.
270
--- a/src/boehm-gc/ChangeLog.linaro
271
+++ b/src/boehm-gc/ChangeLog.linaro
273
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
275
+ GCC Linaro 4.8-2013.07-1 released.
277
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
279
+ GCC Linaro 4.8-2013.07 released.
281
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
283
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
285
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
287
+ GCC Linaro 4.8-2013.05 released.
289
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
291
+ Backport from trunk r197770.
293
+ 2013-03-16 Yvan Roux <yvan.roux@linaro.org>
295
+ * include/private/gcconfig.h (AARCH64): New macro (defined only if
297
+ (mach_type_known): Update comment adding ARM AArch64 target.
298
+ (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE,
299
+ OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING,
300
+ DATASTART, DATAEND, STACKBOTTOM): Define for AArch64.
302
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
304
+ * GCC Linaro 4.8-2013.04 released.
305
--- a/src/boehm-gc/include/private/gcconfig.h
306
+++ b/src/boehm-gc/include/private/gcconfig.h
310
/* Determine the machine type: */
311
+#if defined(__aarch64__)
313
+# if !defined(LINUX)
315
+# define mach_type_known
318
# if defined(__arm__) || defined(__thumb__)
320
# if !defined(LINUX) && !defined(NETBSD)
323
# define mach_type_known
325
+# if defined(LINUX) && defined(__aarch64__)
327
+# define mach_type_known
329
# if defined(LINUX) && defined(__arm__)
331
# define mach_type_known
333
/* running Amdahl UTS4 */
334
/* S390 ==> 390-like machine */
336
+ /* AARCH64 ==> ARM AArch64 */
337
/* ARM32 ==> Intel StrongARM */
338
/* IA64 ==> Intel IPF */
340
@@ -1833,6 +1845,32 @@
345
+# define CPP_WORDSZ 64
346
+# define MACH_TYPE "AARCH64"
347
+# define ALIGNMENT 8
349
+# define HBLKSIZE 4096
352
+# define OS_TYPE "LINUX"
353
+# define LINUX_STACKBOTTOM
354
+# define USE_GENERIC_PUSH_REGS
355
+# define DYNAMIC_LOADING
356
+ extern int __data_start[];
357
+# define DATASTART ((ptr_t)__data_start)
358
+ extern char _end[];
359
+# define DATAEND ((ptr_t)(&_end))
362
+ /* __data_start is usually defined in the target linker script. */
363
+ extern int __data_start[];
364
+# define DATASTART ((ptr_t)__data_start)
365
+ extern void *__stack_base__;
366
+# define STACKBOTTOM ((ptr_t)__stack_base__)
371
# define CPP_WORDSZ 32
372
# define MACH_TYPE "ARM32"
373
--- a/src/include/ChangeLog.linaro
374
+++ b/src/include/ChangeLog.linaro
376
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
378
+ GCC Linaro 4.8-2013.07-1 released.
380
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
382
+ GCC Linaro 4.8-2013.07 released.
384
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
386
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
388
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
390
+ GCC Linaro 4.8-2013.05 released.
392
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
394
+ * GCC Linaro 4.8-2013.04 released.
395
--- a/src/libiberty/ChangeLog.linaro
396
+++ b/src/libiberty/ChangeLog.linaro
398
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
400
+ GCC Linaro 4.8-2013.07-1 released.
402
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
404
+ GCC Linaro 4.8-2013.07 released.
406
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
408
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
410
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
412
+ GCC Linaro 4.8-2013.05 released.
414
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
416
+ * GCC Linaro 4.8-2013.04 released.
417
--- a/src/lto-plugin/ChangeLog.linaro
418
+++ b/src/lto-plugin/ChangeLog.linaro
420
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
422
+ GCC Linaro 4.8-2013.07-1 released.
424
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
426
+ GCC Linaro 4.8-2013.07 released.
428
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
430
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
432
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
434
+ GCC Linaro 4.8-2013.05 released.
436
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
438
+ * GCC Linaro 4.8-2013.04 released.
439
--- a/src/contrib/regression/ChangeLog.linaro
440
+++ b/src/contrib/regression/ChangeLog.linaro
442
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
444
+ GCC Linaro 4.8-2013.07-1 released.
446
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
448
+ GCC Linaro 4.8-2013.07 released.
450
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
452
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
454
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
456
+ GCC Linaro 4.8-2013.05 released.
458
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
460
+ * GCC Linaro 4.8-2013.04 released.
461
--- a/src/contrib/config-list.mk
462
+++ b/src/contrib/config-list.mk
464
# nohup nice make -j25 -l36 -f ../gcc/contrib/config-list.mk > make.out 2>&1 &
466
# v850e1-elf is rejected by config.sub
467
-LIST = alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
468
+LIST = aarch64-elf aarch64-linux-gnu \
469
+ alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
470
alpha64-dec-vms alpha-dec-vms am33_2.0-linux \
471
arm-wrs-vxworks arm-netbsdelf \
472
arm-linux-androideabi arm-uclinux_eabi arm-eabi \
473
--- a/src/contrib/ChangeLog.linaro
474
+++ b/src/contrib/ChangeLog.linaro
476
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
478
+ GCC Linaro 4.8-2013.07-1 released.
480
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
482
+ GCC Linaro 4.8-2013.07 released.
484
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
486
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
488
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
490
+ GCC Linaro 4.8-2013.05 released.
492
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
494
+ Backport from trunk r198443.
495
+ 2013-04-22 Sofiane Naci <sofiane.naci@arm.com>
497
+ * config-list.mk (LIST): Add aarch64-elf and aarch64-linux-gnu.
499
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
501
+ * GCC Linaro 4.8-2013.04 released.
502
--- a/src/contrib/reghunt/ChangeLog.linaro
503
+++ b/src/contrib/reghunt/ChangeLog.linaro
505
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
507
+ GCC Linaro 4.8-2013.07-1 released.
509
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
511
+ GCC Linaro 4.8-2013.07 released.
513
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
515
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
517
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
519
+ GCC Linaro 4.8-2013.05 released.
521
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
523
+ * GCC Linaro 4.8-2013.04 released.
524
--- a/src/libatomic/ChangeLog.linaro
525
+++ b/src/libatomic/ChangeLog.linaro
527
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
529
+ GCC Linaro 4.8-2013.07-1 released.
531
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
533
+ GCC Linaro 4.8-2013.07 released.
535
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
537
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
539
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
541
+ GCC Linaro 4.8-2013.05 released.
543
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
545
+ * GCC Linaro 4.8-2013.04 released.
546
--- a/src/config/ChangeLog.linaro
547
+++ b/src/config/ChangeLog.linaro
549
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
551
+ GCC Linaro 4.8-2013.07-1 released.
553
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
555
+ GCC Linaro 4.8-2013.07 released.
557
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
559
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
561
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
563
+ GCC Linaro 4.8-2013.05 released.
565
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
567
+ * GCC Linaro 4.8-2013.04 released.
568
--- a/src/libbacktrace/ChangeLog.linaro
569
+++ b/src/libbacktrace/ChangeLog.linaro
571
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
573
+ GCC Linaro 4.8-2013.07-1 released.
575
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
577
+ GCC Linaro 4.8-2013.07 released.
579
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
581
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
583
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
585
+ GCC Linaro 4.8-2013.05 released.
587
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
589
+ * GCC Linaro 4.8-2013.04 released.
590
--- a/src/libjava/libltdl/ChangeLog.linaro
591
+++ b/src/libjava/libltdl/ChangeLog.linaro
593
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
595
+ GCC Linaro 4.8-2013.07-1 released.
597
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
599
+ GCC Linaro 4.8-2013.07 released.
601
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
603
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
605
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
607
+ GCC Linaro 4.8-2013.05 released.
609
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
611
+ * GCC Linaro 4.8-2013.04 released.
612
--- a/src/libjava/ChangeLog.linaro
613
+++ b/src/libjava/ChangeLog.linaro
615
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
617
+ GCC Linaro 4.8-2013.07-1 released.
619
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
621
+ GCC Linaro 4.8-2013.07 released.
623
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
625
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
627
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
629
+ GCC Linaro 4.8-2013.05 released.
631
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
633
+ * GCC Linaro 4.8-2013.04 released.
634
--- a/src/libjava/classpath/ChangeLog.linaro
635
+++ b/src/libjava/classpath/ChangeLog.linaro
637
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
639
+ GCC Linaro 4.8-2013.07-1 released.
641
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
643
+ GCC Linaro 4.8-2013.07 released.
645
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
647
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
649
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
651
+ GCC Linaro 4.8-2013.05 released.
653
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
655
+ * GCC Linaro 4.8-2013.04 released.
656
--- a/src/gnattools/ChangeLog.linaro
657
+++ b/src/gnattools/ChangeLog.linaro
659
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
661
+ GCC Linaro 4.8-2013.07-1 released.
663
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
665
+ GCC Linaro 4.8-2013.07 released.
667
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
669
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
671
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
673
+ GCC Linaro 4.8-2013.05 released.
675
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
677
+ * GCC Linaro 4.8-2013.04 released.
678
--- a/src/maintainer-scripts/ChangeLog.linaro
679
+++ b/src/maintainer-scripts/ChangeLog.linaro
681
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
683
+ GCC Linaro 4.8-2013.07-1 released.
685
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
687
+ GCC Linaro 4.8-2013.07 released.
689
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
691
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
693
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
695
+ GCC Linaro 4.8-2013.05 released.
697
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
699
+ * GCC Linaro 4.8-2013.04 released.
700
--- a/src/libgcc/ChangeLog.linaro
701
+++ b/src/libgcc/ChangeLog.linaro
703
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
705
+ GCC Linaro 4.8-2013.07-1 released.
707
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
709
+ GCC Linaro 4.8-2013.07 released.
711
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
713
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
715
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
717
+ GCC Linaro 4.8-2013.05 released.
719
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
721
+ Backport from trunk r198090.
722
+ 2013-04-19 Yufeng Zhang <yufeng.zhang@arm.com>
724
+ * config/aarch64/sfp-machine.h (_FP_W_TYPE): Change to define
725
+ as 'unsigned long long' instead of 'unsigned long'.
726
+ (_FP_WS_TYPE): Change to define as 'signed long long' instead of
729
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
731
+ * GCC Linaro 4.8-2013.04 released.
732
--- a/src/libgcc/config/aarch64/sfp-machine.h
733
+++ b/src/libgcc/config/aarch64/sfp-machine.h
735
<http://www.gnu.org/licenses/>. */
737
#define _FP_W_TYPE_SIZE 64
738
-#define _FP_W_TYPE unsigned long
739
-#define _FP_WS_TYPE signed long
740
+#define _FP_W_TYPE unsigned long long
741
+#define _FP_WS_TYPE signed long long
742
#define _FP_I_TYPE int
744
typedef int TItype __attribute__ ((mode (TI)));
745
--- a/src/libgcc/config/libbid/ChangeLog.linaro
746
+++ b/src/libgcc/config/libbid/ChangeLog.linaro
748
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
750
+ GCC Linaro 4.8-2013.07-1 released.
752
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
754
+ GCC Linaro 4.8-2013.07 released.
756
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
758
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
760
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
762
+ GCC Linaro 4.8-2013.05 released.
764
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
766
+ * GCC Linaro 4.8-2013.04 released.
767
--- a/src/libdecnumber/ChangeLog.linaro
768
+++ b/src/libdecnumber/ChangeLog.linaro
770
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
772
+ GCC Linaro 4.8-2013.07-1 released.
774
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
776
+ GCC Linaro 4.8-2013.07 released.
778
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
780
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
782
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
784
+ GCC Linaro 4.8-2013.05 released.
786
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
788
+ * GCC Linaro 4.8-2013.04 released.
789
--- a/src/gcc/LINARO-VERSION
790
+++ b/src/gcc/LINARO-VERSION
793
--- a/src/gcc/hooks.c
794
+++ b/src/gcc/hooks.c
799
+/* Generic hook that takes (gimple_stmt_iterator *) and returns
802
+hook_bool_gsiptr_false (gimple_stmt_iterator *a ATTRIBUTE_UNUSED)
807
/* Used for the TARGET_ASM_CAN_OUTPUT_MI_THUNK hook. */
809
hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree a ATTRIBUTE_UNUSED,
810
--- a/src/gcc/hooks.h
811
+++ b/src/gcc/hooks.h
813
extern bool hook_bool_const_tree_false (const_tree);
814
extern bool hook_bool_tree_true (tree);
815
extern bool hook_bool_const_tree_true (const_tree);
816
+extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *);
817
extern bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree,
820
--- a/src/gcc/c-family/ChangeLog.linaro
821
+++ b/src/gcc/c-family/ChangeLog.linaro
823
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
825
+ GCC Linaro 4.8-2013.07-1 released.
827
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
829
+ GCC Linaro 4.8-2013.07 released.
831
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
833
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
835
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
837
+ GCC Linaro 4.8-2013.05 released.
839
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
841
+ * GCC Linaro 4.8-2013.04 released.
842
--- a/src/gcc/java/ChangeLog.linaro
843
+++ b/src/gcc/java/ChangeLog.linaro
845
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
847
+ GCC Linaro 4.8-2013.07-1 released.
849
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
851
+ GCC Linaro 4.8-2013.07 released.
853
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
855
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
857
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
859
+ GCC Linaro 4.8-2013.05 released.
861
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
863
+ * GCC Linaro 4.8-2013.04 released.
864
--- a/src/gcc/c/ChangeLog.linaro
865
+++ b/src/gcc/c/ChangeLog.linaro
867
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
869
+ GCC Linaro 4.8-2013.07-1 released.
871
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
873
+ GCC Linaro 4.8-2013.07 released.
875
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
877
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
879
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
881
+ GCC Linaro 4.8-2013.05 released.
883
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
885
+ * GCC Linaro 4.8-2013.04 released.
886
--- a/src/gcc/target.def
887
+++ b/src/gcc/target.def
888
@@ -1289,13 +1289,24 @@
890
tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL)
892
-/* Fold a target-specific builtin. */
893
+/* Fold a target-specific builtin to a tree valid for both GIMPLE
898
tree, (tree fndecl, int n_args, tree *argp, bool ignore),
899
hook_tree_tree_int_treep_bool_null)
901
+/* Fold a target-specific builtin to a valid GIMPLE tree. */
903
+(gimple_fold_builtin,
904
+ "Fold a call to a machine specific built-in function that was set up\n\
905
+by @samp{TARGET_INIT_BUILTINS}. @var{gsi} points to the gimple\n\
906
+statement holding the function call. Returns true if any change\n\
907
+was made to the GIMPLE stream.",
908
+ bool, (gimple_stmt_iterator *gsi),
909
+ hook_bool_gsiptr_false)
911
/* Target hook is used to compare the target attributes in two functions to
912
determine which function's features get higher priority. This is used
913
during function multi-versioning to figure out the order in which two
914
--- a/src/gcc/rtlanal.c
915
+++ b/src/gcc/rtlanal.c
916
@@ -1199,6 +1199,10 @@
917
if (find_reg_note (insn, REG_EQUAL, NULL_RTX))
920
+ /* Check the code to be executed for COND_EXEC. */
921
+ if (GET_CODE (pat) == COND_EXEC)
922
+ pat = COND_EXEC_CODE (pat);
924
if (GET_CODE (pat) == SET && set_noop_p (pat))
927
--- a/src/gcc/configure
928
+++ b/src/gcc/configure
929
@@ -1658,7 +1658,8 @@
930
use sysroot as the system root during the build
931
--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
932
--with-specs=SPECS add SPECS to driver command-line processing
933
- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
934
+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
935
+ GCC `cat $srcdir/LINARO-VERSION`"
936
--with-bugurl=URL Direct users to URL to report a bug
937
--with-multilib-list select multilibs (SH and x86-64 only)
938
--with-gnu-ld assume the C compiler uses GNU ld default=no
939
@@ -7327,7 +7328,7 @@
940
*) PKGVERSION="($withval) " ;;
943
- PKGVERSION="(GCC) "
944
+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
948
--- a/src/gcc/gensupport.c
949
+++ b/src/gcc/gensupport.c
950
@@ -1717,6 +1717,21 @@
951
XVECEXP (insn, 1, 0) = pattern;
954
+ if (XVEC (ce_elem->data, 3) != NULL)
956
+ rtvec attributes = rtvec_alloc (XVECLEN (insn, 4)
957
+ + XVECLEN (ce_elem->data, 3));
960
+ for (i = 0; i < XVECLEN (insn, 4); i++)
961
+ RTVEC_ELT (attributes, i) = XVECEXP (insn, 4, i);
963
+ for (j = 0; j < XVECLEN (ce_elem->data, 3); j++, i++)
964
+ RTVEC_ELT (attributes, i) = XVECEXP (ce_elem->data, 3, j);
966
+ XVEC (insn, 4) = attributes;
969
XSTR (insn, 2) = alter_test_for_insn (ce_elem, insn_elem);
970
XTMPL (insn, 3) = alter_output_for_insn (ce_elem, insn_elem,
971
alternatives, max_operand);
972
--- a/src/gcc/fold-const.c
973
+++ b/src/gcc/fold-const.c
974
@@ -2457,9 +2457,13 @@
977
if (TREE_CODE (arg0) != TREE_CODE (arg1)
978
- /* This is needed for conversions and for COMPONENT_REF.
979
- Might as well play it safe and always test this. */
980
- || TREE_CODE (TREE_TYPE (arg0)) == ERROR_MARK
981
+ /* NOP_EXPR and CONVERT_EXPR are considered equal. */
982
+ && !(CONVERT_EXPR_P (arg0) && CONVERT_EXPR_P (arg1)))
985
+ /* This is needed for conversions and for COMPONENT_REF.
986
+ Might as well play it safe and always test this. */
987
+ if (TREE_CODE (TREE_TYPE (arg0)) == ERROR_MARK
988
|| TREE_CODE (TREE_TYPE (arg1)) == ERROR_MARK
989
|| TYPE_MODE (TREE_TYPE (arg0)) != TYPE_MODE (TREE_TYPE (arg1)))
991
--- a/src/gcc/objc/ChangeLog.linaro
992
+++ b/src/gcc/objc/ChangeLog.linaro
994
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
996
+ GCC Linaro 4.8-2013.07-1 released.
998
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
1000
+ GCC Linaro 4.8-2013.07 released.
1002
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
1004
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
1006
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1008
+ GCC Linaro 4.8-2013.05 released.
1010
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1012
+ * GCC Linaro 4.8-2013.04 released.
1013
--- a/src/gcc/ChangeLog.linaro
1014
+++ b/src/gcc/ChangeLog.linaro
1016
+2013-08-08 Christophe Lyon <christophe.lyon@linaro.org>
1018
+ Backport from trunk
1019
+ r198489,200167,200199,200510,200513,200515,200576.
1020
+ 2013-05-01 Greta Yorsh <Greta.Yorsh@arm.com>
1022
+ * config/arm/thumb2.md (thumb2_smaxsi3,thumb2_sminsi3): Convert
1023
+ define_insn to define_insn_and_split.
1024
+ (thumb32_umaxsi3,thumb2_uminsi3): Likewise.
1025
+ (thumb2_negdi2,thumb2_abssi2,thumb2_neg_abssi2): Likewise.
1026
+ (thumb2_mov_scc,thumb2_mov_negscc,thumb2_mov_notscc): Likewise.
1027
+ (thumb2_movsicc_insn,thumb2_and_scc,thumb2_ior_scc): Likewise.
1028
+ (thumb2_negscc): Likewise.
1030
+ 2013-06-18 Sofiane Naci <sofiane.naci@arm.com>
1032
+ * config/arm/arm.md (attribute "insn"): Move multiplication and division
1034
+ (attribute "type"): ... here. Remove mult.
1035
+ (attribute "mul32"): New attribute.
1036
+ (attribute "mul64"): Add umaal.
1037
+ (*arm_mulsi3): Update attributes.
1038
+ (*arm_mulsi3_v6): Likewise.
1039
+ (*thumb_mulsi3): Likewise.
1040
+ (*thumb_mulsi3_v6): Likewise.
1041
+ (*mulsi3_compare0): Likewise.
1042
+ (*mulsi3_compare0_v6): Likewise.
1043
+ (*mulsi_compare0_scratch): Likewise.
1044
+ (*mulsi_compare0_scratch_v6): Likewise.
1045
+ (*mulsi3addsi): Likewise.
1046
+ (*mulsi3addsi_v6): Likewise.
1047
+ (*mulsi3addsi_compare0): Likewise.
1048
+ (*mulsi3addsi_compare0_v6): Likewise.
1049
+ (*mulsi3addsi_compare0_scratch): Likewise.
1050
+ (*mulsi3addsi_compare0_scratch_v6): Likewise.
1051
+ (*mulsi3subsi): Likewise.
1052
+ (*mulsidi3adddi): Likewise.
1053
+ (*mulsi3addsi_v6): Likewise.
1054
+ (*mulsidi3adddi_v6): Likewise.
1055
+ (*mulsidi3_nov6): Likewise.
1056
+ (*mulsidi3_v6): Likewise.
1057
+ (*umulsidi3_nov6): Likewise.
1058
+ (*umulsidi3_v6): Likewise.
1059
+ (*umulsidi3adddi): Likewise.
1060
+ (*umulsidi3adddi_v6): Likewise.
1061
+ (*smulsi3_highpart_nov6): Likewise.
1062
+ (*smulsi3_highpart_v6): Likewise.
1063
+ (*umulsi3_highpart_nov6): Likewise.
1064
+ (*umulsi3_highpart_v6): Likewise.
1065
+ (mulhisi3): Likewise.
1066
+ (*mulhisi3tb): Likewise.
1067
+ (*mulhisi3bt): Likewise.
1068
+ (*mulhisi3tt): Likewise.
1069
+ (maddhisi4): Likewise.
1070
+ (*maddhisi4tb): Likewise.
1071
+ (*maddhisi4tt): Likewise.
1072
+ (maddhidi4): Likewise.
1073
+ (*maddhidi4tb): Likewise.
1074
+ (*maddhidi4tt): Likewise.
1075
+ (divsi3): Likewise.
1076
+ (udivsi3): Likewise.
1077
+ * config/arm/thumb2.md (thumb2_mulsi_short): Update attributes.
1078
+ (thumb2_mulsi_short_compare0): Likewise.
1079
+ (thumb2_mulsi_short_compare0_scratch): Likewise.
1080
+ * config/arm/arm1020e.md (1020mult1): Update attribute change.
1081
+ (1020mult2): Likewise.
1082
+ (1020mult3): Likewise.
1083
+ (1020mult4): Likewise.
1084
+ (1020mult5): Likewise.
1085
+ (1020mult6): Likewise.
1086
+ * config/arm/cortex-a15.md (cortex_a15_mult32): Update attribute change.
1087
+ (cortex_a15_mult64): Likewise.
1088
+ (cortex_a15_sdiv): Likewise.
1089
+ (cortex_a15_udiv): Likewise.
1090
+ * config/arm/arm1026ejs.md (mult1): Update attribute change.
1091
+ (mult2): Likewise.
1092
+ (mult3): Likewise.
1093
+ (mult4): Likewise.
1094
+ (mult5): Likewise.
1095
+ (mult6): Likewise.
1096
+ * config/arm/marvell-pj4.md (pj4_ir_mul): Update attribute change.
1097
+ (pj4_ir_div): Likewise.
1098
+ * config/arm/arm1136jfs.md (11_mult1): Update attribute change.
1099
+ (11_mult2): Likewise.
1100
+ (11_mult3): Likewise.
1101
+ (11_mult4): Likewise.
1102
+ (11_mult5): Likewise.
1103
+ (11_mult6): Likewise.
1104
+ (11_mult7): Likewise.
1105
+ * config/arm/cortex-a8.md (cortex_a8_mul): Update attribute change.
1106
+ (cortex_a8_mla): Likewise.
1107
+ (cortex_a8_mull): Likewise.
1108
+ (cortex_a8_smulwy): Likewise.
1109
+ (cortex_a8_smlald): Likewise.
1110
+ * config/arm/cortex-m4.md (cortex_m4_alu): Update attribute change.
1111
+ * config/arm/cortex-r4.md (cortex_r4_mul_4): Update attribute change.
1112
+ (cortex_r4_mul_3): Likewise.
1113
+ (cortex_r4_mla_4): Likewise.
1114
+ (cortex_r4_mla_3): Likewise.
1115
+ (cortex_r4_smlald): Likewise.
1116
+ (cortex_r4_mull): Likewise.
1117
+ (cortex_r4_sdiv): Likewise.
1118
+ (cortex_r4_udiv): Likewise.
1119
+ * config/arm/cortex-a7.md (cortex_a7_mul): Update attribute change.
1120
+ (cortex_a7_idiv): Likewise.
1121
+ * config/arm/arm926ejs.md (9_mult1): Update attribute change.
1122
+ (9_mult2): Likewise.
1123
+ (9_mult3): Likewise.
1124
+ (9_mult4): Likewise.
1125
+ (9_mult5): Likewise.
1126
+ (9_mult6): Likewise.
1127
+ * config/arm/cortex-a53.md (cortex_a53_mul): Update attribute change.
1128
+ (cortex_a53_sdiv): Likewise.
1129
+ (cortex_a53_udiv): Likewise.
1130
+ * config/arm/fa726te.md (726te_mult_op): Update attribute change.
1131
+ * config/arm/fmp626.md (mp626_mult1): Update attribute change.
1132
+ (mp626_mult2): Likewise.
1133
+ (mp626_mult3): Likewise.
1134
+ (mp626_mult4): Likewise.
1135
+ * config/arm/fa526.md (526_mult1): Update attribute change.
1136
+ (526_mult2): Likewise.
1137
+ * config/arm/arm-generic.md (mult): Update attribute change.
1138
+ (mult_ldsched_strongarm): Likewise.
1139
+ (mult_ldsched): Likewise.
1140
+ (multi_cycle): Likewise.
1141
+ * config/arm/cortex-a5.md (cortex_a5_mul): Update attribute change.
1142
+ * config/arm/fa606te.md (606te_mult1): Update attribute change.
1143
+ (606te_mult2): Likewise.
1144
+ (606te_mult3): Likewise.
1145
+ (606te_mult4): Likewise.
1146
+ * config/arm/cortex-a9.md (cortex_a9_mult16): Update attribute change.
1147
+ (cortex_a9_mac16): Likewise.
1148
+ (cortex_a9_multiply): Likewise.
1149
+ (cortex_a9_mac): Likewise.
1150
+ (cortex_a9_multiply_long): Likewise.
1151
+ * config/arm/fa626te.md (626te_mult1): Update attribute change.
1152
+ (626te_mult2): Likewise.
1153
+ (626te_mult3): Likewise.
1154
+ (626te_mult4): Likewise.
1156
+ 2013-06-19 Sofiane Naci <sofiane.naci@arm.com>
1158
+ * config/arm/vfp.md: Move VFP instruction classification documentation
1160
+ * config/arm/arm.md: ... here. Update instruction classification
1163
+ 2013-06-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1165
+ * config/arm/predicates.md (arm_cond_move_operator): New predicate.
1166
+ * config/arm/arm.md (movsfcc): Use arm_cond_move_operator predicate.
1167
+ (movdfcc): Likewise.
1168
+ * config/arm/vfp.md (*thumb2_movsf_vfp):
1169
+ Disable predication for arm_restrict_it.
1170
+ (*thumb2_movsfcc_vfp): Disable for arm_restrict_it.
1171
+ (*thumb2_movdfcc_vfp): Likewise.
1172
+ (*abssf2_vfp, *absdf2_vfp, *negsf2_vfp, *negdf2_vfp,*addsf3_vfp,
1173
+ *adddf3_vfp, *subsf3_vfp, *subdf3_vfpc, *divsf3_vfp,*divdf3_vfp,
1174
+ *mulsf3_vfp, *muldf3_vfp, *mulsf3negsf_vfp, *muldf3negdf_vfp,
1175
+ *mulsf3addsf_vfp, *muldf3adddf_vfp, *mulsf3subsf_vfp,
1176
+ *muldf3subdf_vfp, *mulsf3negsfaddsf_vfp, *fmuldf3negdfadddf_vfp,
1177
+ *mulsf3negsfsubsf_vfp, *muldf3negdfsubdf_vfp, *fma<SDF:mode>4,
1178
+ *fmsub<SDF:mode>4, *fnmsub<SDF:mode>4, *fnmadd<SDF:mode>4,
1179
+ *extendsfdf2_vfp, *truncdfsf2_vfp, *extendhfsf2, *truncsfhf2,
1180
+ *truncsisf2_vfp, *truncsidf2_vfp, fixuns_truncsfsi2, fixuns_truncdfsi2,
1181
+ *floatsisf2_vfp, *floatsidf2_vfp, floatunssisf2, floatunssidf2,
1182
+ *sqrtsf2_vfp, *sqrtdf2_vfp, *cmpsf_vfp, *cmpsf_trap_vfp, *cmpdf_vfp,
1183
+ *cmpdf_trap_vfp, <vrint_pattern><SDF:mode>2):
1184
+ Disable predication for arm_restrict_it.
1186
+ 2013-06-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1188
+ * config/arm/arm.md (arm_mulsi3_v6): Add alternative for 16-bit
1190
+ (mulsi3addsi_v6): Disable predicable variant for arm_restrict_it.
1191
+ (mulsi3subsi): Likewise.
1192
+ (mulsidi3adddi): Likewise.
1193
+ (mulsidi3_v6): Likewise.
1194
+ (umulsidi3_v6): Likewise.
1195
+ (umulsidi3adddi_v6): Likewise.
1196
+ (smulsi3_highpart_v6): Likewise.
1197
+ (umulsi3_highpart_v6): Likewise.
1198
+ (mulhisi3tb): Likewise.
1199
+ (mulhisi3bt): Likewise.
1200
+ (mulhisi3tt): Likewise.
1201
+ (maddhisi4): Likewise.
1202
+ (maddhisi4tb): Likewise.
1203
+ (maddhisi4tt): Likewise.
1204
+ (maddhidi4): Likewise.
1205
+ (maddhidi4tb): Likewise.
1206
+ (maddhidi4tt): Likewise.
1207
+ (zeroextractsi_compare0_scratch): Likewise.
1208
+ (insv_zero): Likewise.
1209
+ (insv_t2): Likewise.
1210
+ (anddi_notzesidi_di): Likewise.
1211
+ (anddi_notsesidi_di): Likewise.
1212
+ (andsi_notsi_si): Likewise.
1213
+ (iordi_zesidi_di): Likewise.
1214
+ (xordi_zesidi_di): Likewise.
1215
+ (andsi_iorsi3_notsi): Likewise.
1216
+ (smax_0): Likewise.
1217
+ (smax_m1): Likewise.
1218
+ (smin_0): Likewise.
1219
+ (not_shiftsi): Likewise.
1220
+ (unaligned_loadsi): Likewise.
1221
+ (unaligned_loadhis): Likewise.
1222
+ (unaligned_loadhiu): Likewise.
1223
+ (unaligned_storesi): Likewise.
1224
+ (unaligned_storehi): Likewise.
1225
+ (extv_reg): Likewise.
1226
+ (extzv_t2): Likewise.
1227
+ (divsi3): Likewise.
1228
+ (udivsi3): Likewise.
1229
+ (arm_zero_extendhisi2addsi): Likewise.
1230
+ (arm_zero_extendqisi2addsi): Likewise.
1231
+ (compareqi_eq0): Likewise.
1232
+ (arm_extendhisi2_v6): Likewise.
1233
+ (arm_extendqisi2addsi): Likewise.
1234
+ (arm_movt): Likewise.
1235
+ (thumb2_ldrd): Likewise.
1236
+ (thumb2_ldrd_base): Likewise.
1237
+ (thumb2_ldrd_base_neg): Likewise.
1238
+ (thumb2_strd): Likewise.
1239
+ (thumb2_strd_base): Likewise.
1240
+ (thumb2_strd_base_neg): Likewise.
1241
+ (arm_negsi2): Add alternative for 16-bit encoding.
1242
+ (arm_one_cmplsi2): Likewise.
1244
+ 2013-06-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1246
+ * config/arm/constraints.md (Ts): New constraint.
1247
+ * config/arm/arm.md (arm_movqi_insn): Add alternatives for
1249
+ (compare_scc): Use "Ts" constraint for operand 0.
1250
+ (ior_scc_scc): Likewise.
1251
+ (and_scc_scc): Likewise.
1252
+ (and_scc_scc_nodom): Likewise.
1253
+ (ior_scc_scc_cmp): Likewise for operand 7.
1254
+ (and_scc_scc_cmp): Likewise.
1255
+ * config/arm/thumb2.md (thumb2_movsi_insn):
1256
+ Add alternatives for 16-bit encodings.
1257
+ (thumb2_movhi_insn): Likewise.
1258
+ (thumb2_movsicc_insn): Likewise.
1259
+ (thumb2_and_scc): Take 'and' outside cond_exec. Use "Ts" constraint.
1260
+ (thumb2_negscc): Use "Ts" constraint.
1261
+ Move mvn instruction outside cond_exec block.
1262
+ * config/arm/vfp.md (thumb2_movsi_vfp): Add alternatives
1263
+ for 16-bit encodings.
1265
+ 2013-07-01 Sofiane Naci <sofiane.naci@arm.com>
1267
+ * arm.md (attribute "wtype"): Delete. Move attribute values from here
1269
+ (attribute "type"): ... here, and prefix with "wmmx_".
1270
+ (attribute "core_cycles"): Update for attribute changes.
1271
+ * iwmmxt.md (tbcstv8qi): Update for attribute changes.
1272
+ (tbcstv4hi): Likewise.
1273
+ (tbcstv2si): Likewise.
1274
+ (iwmmxt_iordi3): Likewise.
1275
+ (iwmmxt_xordi3): Likewise.
1276
+ (iwmmxt_anddi3): Likewise.
1277
+ (iwmmxt_nanddi3): Likewise.
1278
+ (iwmmxt_arm_movdi): Likewise.
1279
+ (iwmmxt_movsi_insn): Likewise.
1280
+ (mov<mode>_internal): Likewise.
1281
+ (and<mode>3_iwmmxt): Likewise.
1282
+ (ior<mode>3_iwmmxt): Likewise.
1283
+ (xor<mode>3_iwmmxt): Likewise.
1284
+ (add<mode>3_iwmmxt): Likewise.
1285
+ (ssaddv8qi3): Likewise.
1286
+ (ssaddv4hi3): Likewise.
1287
+ (ssaddv2si3): Likewise.
1288
+ (usaddv8qi3): Likewise.
1289
+ (usaddv4hi3): Likewise.
1290
+ (usaddv2si3): Likewise.
1291
+ (sub<mode>3_iwmmxt): Likewise.
1292
+ (sssubv8qi3): Likewise.
1293
+ (sssubv4hi3): Likewise.
1294
+ (sssubv2si3): Likewise.
1295
+ (ussubv8qi3): Likewise.
1296
+ (ussubv4hi3): Likewise.
1297
+ (ussubv2si3): Likewise.
1298
+ (mulv4hi3_iwmmxt): Likewise.
1299
+ (smulv4hi3_highpart): Likewise.
1300
+ (umulv4hi3_highpart): Likewise.
1301
+ (iwmmxt_wmacs): Likewise.
1302
+ (iwmmxt_wmacsz): Likewise.
1303
+ (iwmmxt_wmacu): Likewise.
1304
+ (iwmmxt_wmacuz): Likewise.
1305
+ (iwmmxt_clrdi): Likewise.
1306
+ (iwmmxt_clrv8qi): Likewise.
1307
+ (iwmmxt_clr4hi): Likewise.
1308
+ (iwmmxt_clr2si): Likewise.
1309
+ (iwmmxt_uavgrndv8qi3): Likewise.
1310
+ (iwmmxt_uavgrndv4hi3): Likewise.
1311
+ (iwmmxt_uavgv8qi3): Likewise.
1312
+ (iwmmxt_uavgv4hi3): Likewise.
1313
+ (iwmmxt_tinsrb): Likewise.
1314
+ (iwmmxt_tinsrh): Likewise.
1315
+ (iwmmxt_tinsrw): Likewise.
1316
+ (iwmmxt_textrmub): Likewise.
1317
+ (iwmmxt_textrmsb): Likewise.
1318
+ (iwmmxt_textrmuh): Likewise.
1319
+ (iwmmxt_textrmsh): Likewise.
1320
+ (iwmmxt_textrmw): Likewise.
1321
+ (iwmxxt_wshufh): Likewise.
1322
+ (eqv8qi3): Likewise.
1323
+ (eqv4hi3): Likewise.
1324
+ (eqv2si3): Likewise.
1325
+ (gtuv8qi3): Likewise.
1326
+ (gtuv4hi3): Likewise.
1327
+ (gtuv2si3): Likewise.
1328
+ (gtv8qi3): Likewise.
1329
+ (gtv4hi3): Likewise.
1330
+ (gtv2si3): Likewise.
1331
+ (smax<mode>3_iwmmxt): Likewise.
1332
+ (umax<mode>3_iwmmxt): Likewise.
1333
+ (smin<mode>3_iwmmxt): Likewise.
1334
+ (umin<mode>3_iwmmxt): Likewise.
1335
+ (iwmmxt_wpackhss): Likewise.
1336
+ (iwmmxt_wpackwss): Likewise.
1337
+ (iwmmxt_wpackdss): Likewise.
1338
+ (iwmmxt_wpackhus): Likewise.
1339
+ (iwmmxt_wpackwus): Likewise.
1340
+ (iwmmxt_wpackdus): Likewise.
1341
+ (iwmmxt_wunpckihb): Likewise.
1342
+ (iwmmxt_wunpckihh): Likewise.
1343
+ (iwmmxt_wunpckihw): Likewise.
1344
+ (iwmmxt_wunpckilb): Likewise.
1345
+ (iwmmxt_wunpckilh): Likewise.
1346
+ (iwmmxt_wunpckilw): Likewise.
1347
+ (iwmmxt_wunpckehub): Likewise.
1348
+ (iwmmxt_wunpckehuh): Likewise.
1349
+ (iwmmxt_wunpckehuw): Likewise.
1350
+ (iwmmxt_wunpckehsb): Likewise.
1351
+ (iwmmxt_wunpckehsh): Likewise.
1352
+ (iwmmxt_wunpckehsw): Likewise.
1353
+ (iwmmxt_wunpckelub): Likewise.
1354
+ (iwmmxt_wunpckeluh): Likewise.
1355
+ (iwmmxt_wunpckeluw): Likewise.
1356
+ (iwmmxt_wunpckelsb): Likewise.
1357
+ (iwmmxt_wunpckelsh): Likewise.
1358
+ (iwmmxt_wunpckelsw): Likewise.
1359
+ (ror<mode>3): Likewise.
1360
+ (ashr<mode>3_iwmmxt): Likewise.
1361
+ (lshr<mode>3_iwmmxt): Likewise.
1362
+ (ashl<mode>3_iwmmxt): Likewise.
1363
+ (ror<mode>3_di): Likewise.
1364
+ (ashr<mode>3_di): Likewise.
1365
+ (lshr<mode>3_di): Likewise.
1366
+ (ashl<mode>3_di): Likewise.
1367
+ (iwmmxt_wmadds): Likewise.
1368
+ (iwmmxt_wmaddu): Likewise.
1369
+ (iwmmxt_tmia): Likewise.
1370
+ (iwmmxt_tmiaph): Likewise.
1371
+ (iwmmxt_tmiabb): Likewise.
1372
+ (iwmmxt_tmiatb): Likewise.
1373
+ (iwmmxt_tmiabt): Likewise.
1374
+ (iwmmxt_tmiatt): Likewise.
1375
+ (iwmmxt_tmovmskb): Likewise.
1376
+ (iwmmxt_tmovmskh): Likewise.
1377
+ (iwmmxt_tmovmskw): Likewise.
1378
+ (iwmmxt_waccb): Likewise.
1379
+ (iwmmxt_wacch): Likewise.
1380
+ (iwmmxt_waccw): Likewise.
1381
+ (iwmmxt_waligni): Likewise.
1382
+ (iwmmxt_walignr): Likewise.
1383
+ (iwmmxt_walignr0): Likewise.
1384
+ (iwmmxt_walignr1): Likewise.
1385
+ (iwmmxt_walignr2): Likewise.
1386
+ (iwmmxt_walignr3): Likewise.
1387
+ (iwmmxt_wsadb): Likewise.
1388
+ (iwmmxt_wsadh): Likewise.
1389
+ (iwmmxt_wsadbz): Likewise.
1390
+ (iwmmxt_wsadhz): Likewise.
1391
+ * iwmmxt2.md (iwmmxt_wabs<mode>3): Update for attribute changes.
1392
+ (iwmmxt_wabsdiffb): Likewise.
1393
+ (iwmmxt_wabsdiffh): Likewise.
1394
+ (iwmmxt_wabsdiffw): Likewise.
1395
+ (iwmmxt_waddsubhx): Likewise
1396
+ (iwmmxt_wsubaddhx): Likewise.
1397
+ (addc<mode>3): Likewise.
1398
+ (iwmmxt_avg4): Likewise.
1399
+ (iwmmxt_avg4r): Likewise.
1400
+ (iwmmxt_wmaddsx): Likewise.
1401
+ (iwmmxt_wmaddux): Likewise.
1402
+ (iwmmxt_wmaddsn): Likewise.
1403
+ (iwmmxt_wmaddun): Likewise.
1404
+ (iwmmxt_wmulwsm): Likewise.
1405
+ (iwmmxt_wmulwum): Likewise.
1406
+ (iwmmxt_wmulsmr): Likewise.
1407
+ (iwmmxt_wmulumr): Likewise.
1408
+ (iwmmxt_wmulwsmr): Likewise.
1409
+ (iwmmxt_wmulwumr): Likewise.
1410
+ (iwmmxt_wmulwl): Likewise.
1411
+ (iwmmxt_wqmulm): Likewise.
1412
+ (iwmmxt_wqmulwm): Likewise.
1413
+ (iwmmxt_wqmulmr): Likewise.
1414
+ (iwmmxt_wqmulwmr): Likewise.
1415
+ (iwmmxt_waddbhusm): Likewise.
1416
+ (iwmmxt_waddbhusl): Likewise.
1417
+ (iwmmxt_wqmiabb): Likewise.
1418
+ (iwmmxt_wqmiabt): Likewise.
1419
+ (iwmmxt_wqmiatb): Likewise.
1420
+ (iwmmxt_wqmiatt): Likewise.
1421
+ (iwmmxt_wqmiabbn): Likewise.
1422
+ (iwmmxt_wqmiabtn): Likewise.
1423
+ (iwmmxt_wqmiatbn): Likewise.
1424
+ (iwmmxt_wqmiattn): Likewise.
1425
+ (iwmmxt_wmiabb): Likewise.
1426
+ (iwmmxt_wmiabt): Likewise.
1427
+ (iwmmxt_wmiatb): Likewise.
1428
+ (iwmmxt_wmiatt): Likewise.
1429
+ (iwmmxt_wmiabbn): Likewise.
1430
+ (iwmmxt_wmiabtn): Likewise.
1431
+ (iwmmxt_wmiatbn): Likewise.
1432
+ (iwmmxt_wmiattn): Likewise.
1433
+ (iwmmxt_wmiawbb): Likewise.
1434
+ (iwmmxt_wmiawbt): Likewise.
1435
+ (iwmmxt_wmiawtb): Likewise.
1436
+ (iwmmxt_wmiawtt): Likewise.
1437
+ (iwmmxt_wmiawbbn): Likewise.
1438
+ (iwmmxt_wmiawbtn): Likewise.
1439
+ (iwmmxt_wmiawtbn): Likewise.
1440
+ (iwmmxt_wmiawttn): Likewise.
1441
+ (iwmmxt_wmerge): Likewise.
1442
+ (iwmmxt_tandc<mode>3): Likewise.
1443
+ (iwmmxt_torc<mode>3): Likewise.
1444
+ (iwmmxt_torvsc<mode>3): Likewise.
1445
+ (iwmmxt_textrc<mode>3): Likewise.
1446
+ * marvell-f-iwmmxt.md (wmmxt_shift): Update for attribute changes.
1447
+ (wmmxt_pack): Likewise.
1448
+ (wmmxt_mult_c1): Likewise.
1449
+ (wmmxt_mult_c2): Likewise.
1450
+ (wmmxt_alu_c1): Likewise.
1451
+ (wmmxt_alu_c2): Likewise.
1452
+ (wmmxt_alu_c3): Likewise.
1453
+ (wmmxt_transfer_c1): Likewise.
1454
+ (wmmxt_transfer_c2): Likewise.
1455
+ (wmmxt_transfer_c3): Likewise.
1456
+ (marvell_f_iwmmxt_wstr): Likewise.
1457
+ (marvell_f_iwmmxt_wldr): Likewise.
1459
+2013-08-07 Christophe Lyon <christophe.lyon@linaro.org>
1461
+ Backport from trunk r201237.
1462
+ 2013-07-25 Terry Guo <terry.guo@arm.com>
1464
+ * config/arm/arm.c (thumb1_size_rtx_costs): Assign proper cost for
1465
+ shift_add/shift_sub0/shift_sub1 RTXs.
1467
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
1469
+ Backport from trunk r200596,201067,201083.
1470
+ 2013-07-02 Ian Bolton <ian.bolton@arm.com>
1472
+ * config/aarch64/aarch64-simd.md (absdi2): Support abs for
1475
+ 2013-07-19 Ian Bolton <ian.bolton@arm.com>
1477
+ * config/aarch64/arm_neon.h (vabs_s64): New function
1479
+ 2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
1481
+ * config/aarch64/aarch64-builtins.c
1482
+ (aarch64_fold_builtin): Fold abs in all modes.
1483
+ * config/aarch64/aarch64-simd-builtins.def
1484
+ (abs): Enable for all modes.
1485
+ * config/aarch64/arm_neon.h
1486
+ (vabs<q>_s<8,16,32,64): Rewrite using builtins.
1487
+ (vabs_f64): Add missing intrinsic.
1489
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
1491
+ Backport from trunk r198735,198831,199959.
1492
+ 2013-05-09 Sofiane Naci <sofiane.naci@arm.com>
1494
+ * config/aarch64/aarch64.md: New movtf split.
1495
+ (*movtf_aarch64): Update.
1496
+ (aarch64_movdi_tilow): Handle TF modes and rename to
1497
+ aarch64_movdi_<mode>low.
1498
+ (aarch64_movdi_tihigh): Handle TF modes and rename to
1499
+ aarch64_movdi_<mode>high
1500
+ (aarch64_movtihigh_di): Handle TF modes and rename to
1501
+ aarch64_mov<mode>high_di
1502
+ (aarch64_movtilow_di): Handle TF modes and rename to
1503
+ aarch64_mov<mode>low_di
1504
+ (aarch64_movtilow_tilow): Remove spurious whitespace.
1505
+ * config/aarch64/aarch64.c (aarch64_split_128bit_move): Handle TFmode
1507
+ (aarch64_print_operand): Update.
1509
+ 2013-05-13 Sofiane Naci <sofiane.naci@arm.com>
1511
+ * config/aarch64/aarch64-simd.md (aarch64_simd_mov<mode>): Group
1512
+ similar switch cases.
1513
+ (aarch64_simd_mov): Rename to aarch64_split_simd_mov. Update.
1514
+ (aarch64_simd_mov_to_<mode>low): Delete.
1515
+ (aarch64_simd_mov_to_<mode>high): Delete.
1516
+ (move_lo_quad_<mode>): Add w<-r alternative.
1517
+ (aarch64_simd_move_hi_quad_<mode>): Likewise.
1518
+ (aarch64_simd_mov_from_*): Update type attribute.
1519
+ * config/aarch64/aarch64.c (aarch64_split_simd_move): Refacror switch
1522
+ 2013-06-11 Sofiane Naci <sofiane.naci@arm.com>
1524
+ * config/aarch64/aarch64-simd.md (move_lo_quad_<mode>): Update.
1526
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
1528
+ Backport from trunk r199438,199439,201326.
1530
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
1532
+ * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added.
1533
+ (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes.
1534
+ (arm_emit_vfp_multi_reg_pop): Likewise.
1535
+ (thumb2_emit_ldrd_pop): Likewise.
1536
+ (arm_expand_epilogue): Add misc REG_CFA notes.
1537
+ (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE.
1539
+ 2013-05-30 Bernd Schmidt <bernds@codesourcery.com>
1540
+ Zhenqiang Chen <zhenqiang.chen@linaro.org>
1542
+ * config/arm/arm-protos.h: Add and update function protos.
1543
+ * config/arm/arm.c (use_simple_return_p): New added.
1544
+ (thumb2_expand_return): Check simple_return flag.
1545
+ * config/arm/arm.md: Add simple_return and conditional simple_return.
1546
+ * config/arm/iterators.md: Add iterator for return and simple_return.
1548
+ 2013-07-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
1550
+ PR rtl-optimization/57637
1551
+ * function.c (move_insn_for_shrink_wrap): Also check the
1552
+ GEN set of the LIVE problem for the liveness analysis
1553
+ if it exists, otherwise give up.
1555
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
1557
+ Backport from trunk r198928,198973,199203,201240,201241,201307.
1558
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1561
+ * config/arm/predicates.md (call_insn_operand): New predicate.
1562
+ * config/arm/constraints.md ("Cs", "Ss"): New constraints.
1563
+ * config/arm/arm.md (*call_insn, *call_value_insn): Match only
1564
+ if insn is not a tail call.
1565
+ (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through
1567
+ * config/arm/arm.h (enum reg_class): New caller save register class.
1568
+ (REG_CLASS_NAMES): Likewise.
1569
+ (REG_CLASS_CONTENTS): Likewise.
1570
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling
1573
+ 2013-05-16 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1576
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Add check
1579
+ 2013-05-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1583
+ * config/arm/arm.c (any_sibcall_uses_r3): Rename to ..
1584
+ (any_sibcall_could_use_r3): this and handle indirect calls.
1585
+ (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3.
1587
+ 2013-07-25 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1592
+ * config/arm/arm.md ("*sibcall_value_insn): Replace use of
1593
+ Ss with US. Adjust output for v5 and v4t.
1594
+ (*sibcall_value_insn): Likewise and loosen predicate on
1596
+ * config/arm/constraints.md ("Ss"): Rename to US.
1598
+ 2013-07-25 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1600
+ * config/arm/arm.md (*sibcall_insn): Remove unnecessary space.
1602
+ 2013-07-29 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1603
+ Fix incorrect changelog entry.
1610
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1612
+ Backport from trunk r200922.
1613
+ 2013-07-12 Tejas Belagod <tejas.belagod@arm.com>
1615
+ * config/aarch64/aarch64-protos.h
1616
+ (aarch64_simd_immediate_valid_for_move): Remove.
1617
+ * config/aarch64/aarch64.c (simd_immediate_info): New member.
1618
+ (aarch64_simd_valid_immediate): Recognize idioms for shifting ones
1620
+ (aarch64_output_simd_mov_immediate): Print the correct shift specifier.
1622
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1624
+ Backport from trunk r200670.
1625
+ 2013-07-04 Tejas Belagod <tejas.belagod@arm.com>
1627
+ * config/aarch64/aarch64-protos.h (cpu_vector_cost): New.
1628
+ (tune_params): New member 'const vec_costs'.
1629
+ * config/aarch64/aarch64.c (generic_vector_cost): New.
1630
+ (generic_tunings): New member 'generic_vector_cost'.
1631
+ (aarch64_builtin_vectorization_cost): New.
1632
+ (aarch64_add_stmt_cost): New.
1633
+ (TARGET_VECTORIZE_ADD_STMT_COST): New.
1634
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
1636
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1638
+ Backport from trunk r200637.
1639
+ 2013-07-03 Yufeng Zhang <yufeng.zhang@arm.com>
1641
+ * config/aarch64/aarch64.h (enum arm_abi_type): Remove.
1642
+ (ARM_ABI_AAPCS64): Ditto.
1644
+ (ARM_DEFAULT_ABI): Ditto.
1646
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1648
+ Backport from trunk r200532, r200565.
1649
+ 2013-06-28 Marcus Shawcroft <marcus.shawcroft@arm.com>
1651
+ * config/aarch64/aarch64.c (aarch64_cannot_force_const_mem): Adjust
1654
+ 2013-06-29 Yufeng Zhang <yufeng.zhang@arm.com>
1656
+ * config/aarch64/aarch64.c: Remove junk from the beginning of the
1659
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1661
+ Backport from trunk r200531.
1662
+ 2013-06-28 Marcus Shawcroft <marcus.shawcroft@arm.com>
1664
+ * config/aarch64/aarch64-protos.h (aarch64_symbol_type):
1665
+ Update comment w.r.t SYMBOL_TINY_ABSOLUTE.
1667
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1669
+ Backport from trunk r200519.
1670
+ 2013-06-28 Marcus Shawcroft <marcus.shawcroft@arm.com>
1672
+ * config/aarch64/aarch64-protos.h
1673
+ aarch64_classify_symbol_expression): Define.
1674
+ (aarch64_symbolic_constant_p): Remove.
1675
+ * config/aarch64/aarch64.c (aarch64_classify_symbol_expression): Remove
1676
+ static. Fix line length and white space.
1677
+ (aarch64_symbolic_constant_p): Remove.
1678
+ * config/aarch64/predicates.md (aarch64_valid_symref):
1679
+ Use aarch64_classify_symbol_expression.
1681
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1683
+ Backport from trunk r200466, r200467.
1684
+ 2013-06-27 Yufeng Zhang <yufeng.zhang@arm.com>
1686
+ * config/aarch64/aarch64.c (aarch64_force_temporary): Add an extra
1687
+ parameter 'mode' of type 'enum machine_mode mode'; change to pass
1688
+ 'mode' to force_reg.
1689
+ (aarch64_add_offset): Update calls to aarch64_force_temporary.
1690
+ (aarch64_expand_mov_immediate): Likewise.
1692
+ 2013-06-27 Yufeng Zhang <yufeng.zhang@arm.com>
1694
+ * config/aarch64/aarch64.c (aarch64_add_offset): Change to pass
1695
+ 'mode' to aarch64_plus_immediate and gen_rtx_PLUS.
1697
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
1699
+ Backport from trunk r200419.
1700
+ 2013-06-26 Greta Yorsh <Greta.Yorsh@arm.com>
1702
+ * config/arm/arm.h (MAX_CONDITIONAL_EXECUTE): Define macro.
1703
+ * config/arm/arm-protos.h (arm_max_conditional_execute): New
1705
+ (tune_params): Update comment.
1706
+ * config/arm/arm.c (arm_cortex_a15_tune): Set max_cond_insns to 2.
1707
+ (arm_max_conditional_execute): New function.
1708
+ (thumb2_final_prescan_insn): Use max_insn_skipped and
1709
+ MAX_INSN_PER_IT_BLOCK to compute maximum instructions in a block.
1711
+2013-07-24 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1713
+ * LINARO-VERSION: Bump version.
1715
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1717
+ GCC Linaro 4.8-2013.07-1 released.
1719
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1721
+ Backport from trunk r201005.
1722
+ 2013-07-17 Yvan Roux <yvan.roux@linaro.org>
1725
+ * config/arm/arm.c (gen_movmem_ldrd_strd): Fix unaligned load/store
1728
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
1730
+ GCC Linaro 4.8-2013.07 released.
1732
+2013-07-03 Christophe Lyon <christophe.lyon@linaro.org>
1734
+ Revert backport from trunk r198928,198973,199203.
1735
+ 2013-05-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1739
+ * config/arm/arm.c (any_sibcall_uses_r3): Rename to ..
1740
+ (any_sibcall_could_use_r3): this and handle indirect calls.
1741
+ (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3.
1743
+ 2013-05-16 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1746
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Add check
1749
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1752
+ * config/arm/predicates.md (call_insn_operand): New predicate.
1753
+ * config/arm/constraints.md ("Cs", "Ss"): New constraints.
1754
+ * config/arm/arm.md (*call_insn, *call_value_insn): Match only
1755
+ if insn is not a tail call.
1756
+ (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through
1758
+ * config/arm/arm.h (enum reg_class): New caller save register class.
1759
+ (REG_CLASS_NAMES): Likewise.
1760
+ (REG_CLASS_CONTENTS): Likewise.
1761
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling
1764
+2013-07-03 Christophe Lyon <christophe.lyon@linaro.org>
1766
+ Revert backport from mainline (r199438, r199439)
1767
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
1769
+ * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added.
1770
+ (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes.
1771
+ (arm_emit_vfp_multi_reg_pop): Likewise.
1772
+ (thumb2_emit_ldrd_pop): Likewise.
1773
+ (arm_expand_epilogue): Add misc REG_CFA notes.
1774
+ (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE.
1776
+ 2013-05-30 Bernd Schmidt <bernds@codesourcery.com>
1777
+ Zhenqiang Chen <zhenqiang.chen@linaro.org>
1779
+ * config/arm/arm-protos.h: Add and update function protos.
1780
+ * config/arm/arm.c (use_simple_return_p): New added.
1781
+ (thumb2_expand_return): Check simple_return flag.
1782
+ * config/arm/arm.md: Add simple_return and conditional simple_return.
1783
+ * config/arm/iterators.md: Add iterator for return and simple_return.
1784
+ * gcc.dg/shrink-wrap-alloca.c: New added.
1785
+ * gcc.dg/shrink-wrap-pretend.c: New added.
1786
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
1788
+2013-07-03 Christophe Lyon <christophe.lyon@linaro.org>
1790
+ Backport from trunk r199640, 199705, 199733, 199734, 199739.
1791
+ 2013-06-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1793
+ * rtl.def: Add extra fourth optional field to define_cond_exec.
1794
+ * gensupport.c (process_one_cond_exec): Process attributes from
1796
+ * doc/md.texi: Document fourth field in define_cond_exec.
1798
+ 2013-06-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1800
+ * config/arm/arm.md (enabled_for_depr_it): New attribute.
1801
+ (predicable_short_it): Likewise.
1802
+ (predicated): Likewise.
1803
+ (enabled): Handle above.
1804
+ (define_cond_exec): Set predicated attribute to yes.
1806
+ 2013-06-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1808
+ * config/arm/sync.md (atomic_loaddi_1):
1809
+ Disable predication for arm_restrict_it.
1810
+ (arm_load_exclusive<mode>): Likewise.
1811
+ (arm_load_exclusivesi): Likewise.
1812
+ (arm_load_exclusivedi): Likewise.
1813
+ (arm_load_acquire_exclusive<mode>): Likewise.
1814
+ (arm_load_acquire_exclusivesi): Likewise.
1815
+ (arm_load_acquire_exclusivedi): Likewise.
1816
+ (arm_store_exclusive<mode>): Likewise.
1817
+ (arm_store_exclusive<mode>): Likewise.
1818
+ (arm_store_release_exclusivedi): Likewise.
1819
+ (arm_store_release_exclusive<mode>): Likewise.
1821
+ 2013-06-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1823
+ * config/arm/arm-ldmstm.ml: Set "predicable_short_it" to "no"
1824
+ where appropriate.
1825
+ * config/arm/ldmstm.md: Regenerate.
1827
+ 2013-06-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1829
+ * config/arm/arm-fixed.md (add<mode>3,usadd<mode>3,ssadd<mode>3,
1830
+ sub<mode>3, ussub<mode>3, sssub<mode>3, arm_ssatsihi_shift,
1831
+ arm_usatsihi): Adjust alternatives for arm_restrict_it.
1833
+2013-07-02 Rob Savoye <rob.savoye@linaro.org>
1835
+ Backport from trunk 200096
1837
+ 2013-06-14 Vidya Praveen <vidyapraveen@arm.com>
1839
+ * config/aarch64/aarch64-simd.md (aarch64_<su>mlal_lo<mode>):
1841
+ (aarch64_<su>mlal_hi<mode>, aarch64_<su>mlsl_lo<mode>): Likewise.
1842
+ (aarch64_<su>mlsl_hi<mode>, aarch64_<su>mlal<mode>): Likewise.
1843
+ (aarch64_<su>mlsl<mode>): Likewise.
1845
+2013-07-02 Rob Savoye <rob.savoye@linaro.org>
1847
+ Backport from trunk 200062
1849
+ 2013-06-13 Bin Cheng <bin.cheng@arm.com>
1850
+ * fold-const.c (operand_equal_p): Consider NOP_EXPR and
1851
+ CONVERT_EXPR as equal nodes.
1853
+2013-07-02 Rob Savoye <rob.savoye@linaro.org>
1854
+ Backport from trunk 199810
1856
+ 2013-06-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1858
+ * config/arm/arm.md (anddi3_insn): Remove duplicate alternatives.
1859
+ Clean up alternatives.
1861
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
1863
+ Backport from trunk 200152
1864
+ 2013-06-17 Sofiane Naci <sofiane.naci@arm.com>
1866
+ * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>): Add r<-w
1867
+ alternative and update.
1868
+ (aarch64_dup_lanedi): Delete.
1869
+ * config/aarch64/arm_neon.h (vdup<bhsd>_lane_*): Update.
1870
+ * config/aarch64/aarch64-simd-builtins.def: Update.
1872
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
1874
+ Backport from trunk 200061
1875
+ 2013-06-13 Bin Cheng <bin.cheng@arm.com>
1877
+ * rtlanal.c (noop_move_p): Check the code to be executed for
1880
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
1882
+ Backport from trunk 199694
1883
+ 2013-06-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1885
+ * config/arm/arm.c (MAX_INSN_PER_IT_BLOCK): New macro.
1886
+ (arm_option_override): Override arm_restrict_it where appropriate.
1887
+ (thumb2_final_prescan_insn): Use MAX_INSN_PER_IT_BLOCK.
1888
+ * config/arm/arm.opt (mrestrict-it): New command-line option.
1889
+ * doc/invoke.texi: Document -mrestrict-it.
1891
+2013-06-20 Christophe Lyon <christophe.lyon@linaro.org>
1893
+ Backport from trunk r198683.
1894
+ 2013-05-07 Christophe Lyon <christophe.lyon@linaro.org>
1896
+ * config/arm/arm.c (arm_asan_shadow_offset): New function.
1897
+ (TARGET_ASAN_SHADOW_OFFSET): Define.
1898
+ * config/arm/linux-eabi.h (ASAN_CC1_SPEC): Define.
1899
+ (LINUX_OR_ANDROID_CC): Add ASAN_CC1_SPEC.
1901
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
1903
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
1905
+2013-06-06 Zhenqiang Chen <zhenqiang.chen@linaro.org>
1907
+ Backport from mainline (r199438, r199439)
1908
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
1910
+ * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added.
1911
+ (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes.
1912
+ (arm_emit_vfp_multi_reg_pop): Likewise.
1913
+ (thumb2_emit_ldrd_pop): Likewise.
1914
+ (arm_expand_epilogue): Add misc REG_CFA notes.
1915
+ (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE.
1917
+ 2013-05-30 Bernd Schmidt <bernds@codesourcery.com>
1918
+ Zhenqiang Chen <zhenqiang.chen@linaro.org>
1920
+ * config/arm/arm-protos.h: Add and update function protos.
1921
+ * config/arm/arm.c (use_simple_return_p): New added.
1922
+ (thumb2_expand_return): Check simple_return flag.
1923
+ * config/arm/arm.md: Add simple_return and conditional simple_return.
1924
+ * config/arm/iterators.md: Add iterator for return and simple_return.
1925
+ * gcc.dg/shrink-wrap-alloca.c: New added.
1926
+ * gcc.dg/shrink-wrap-pretend.c: New added.
1927
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
1929
+2013-06-06 Kugan Vivekanandarajah <kuganv@linaro.org>
1931
+ Backport from mainline r198879:
1933
+ 2013-05-14 Chung-Lin Tang <cltang@codesourcery.com>
1935
+ * config/arm/arm.h (EPILOGUE_USES): Only return true
1936
+ for LR_REGNUM after epilogue_completed.
1938
+2013-06-05 Christophe Lyon <christophe.lyon@linaro.org>
1940
+ Backport from trunk r199652,199653,199656,199657,199658.
1942
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
1944
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Call
1945
+ into function to generate MOVI instruction.
1946
+ * config/aarch64/aarch64.c (aarch64_simd_container_mode):
1948
+ (aarch64_preferred_simd_mode): Turn into wrapper.
1949
+ (aarch64_output_scalar_simd_mov_immediate): New function.
1950
+ * config/aarch64/aarch64-protos.h: Add prototype for above.
1952
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
1954
+ * config/aarch64/aarch64.c (simd_immediate_info): Remove
1955
+ element_char member.
1956
+ (sizetochar): Return signed char.
1957
+ (aarch64_simd_valid_immediate): Remove elchar and other
1958
+ unnecessary variables.
1959
+ (aarch64_output_simd_mov_immediate): Take rtx instead of &rtx.
1960
+ Calculate element_char as required.
1961
+ * config/aarch64/aarch64-protos.h: Update and move prototype
1962
+ for aarch64_output_simd_mov_immediate.
1963
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>):
1966
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
1968
+ * config/aarch64/aarch64.c (simd_immediate_info): Struct to hold
1969
+ information completed by aarch64_simd_valid_immediate.
1970
+ (aarch64_legitimate_constant_p): Update arguments.
1971
+ (aarch64_simd_valid_immediate): Work with struct rather than many
1973
+ (aarch64_simd_scalar_immediate_valid_for_move): Update arguments.
1974
+ (aarch64_simd_make_constant): Update arguments.
1975
+ (aarch64_output_simd_mov_immediate): Work with struct rather than
1976
+ many pointers. Output immediate directly rather than as operand.
1977
+ * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate):
1979
+ * config/aarch64/constraints.md (Dn): Update arguments.
1981
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
1983
+ * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): No
1985
+ (aarch64_simd_immediate_valid_for_move): Remove.
1986
+ (aarch64_simd_scalar_immediate_valid_for_move): Update call.
1987
+ (aarch64_simd_make_constant): Update call.
1988
+ (aarch64_output_simd_mov_immediate): Update call.
1989
+ * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate):
1991
+ * config/aarch64/constraints.md (Dn): Update call.
1993
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
1995
+ * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Change
1996
+ return type to bool for prototype.
1997
+ (aarch64_legitimate_constant_p): Check for true instead of not -1.
1998
+ (aarch64_simd_valid_immediate): Fix up each return to return a bool.
1999
+ (aarch64_simd_immediate_valid_for_move): Update retval for bool.
2001
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
2003
+ Backport from trunk r199261.
2004
+ 2013-05-23 Christian Bruel <christian.bruel@st.com>
2007
+ * config/arm/arm.c (arm_dwarf_register_span): Do not use dbx number.
2009
+2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
2011
+ Backport from trunk
2012
+ r198890,199254,199259,199260,199293,199407,199408,199454,199544,199545.
2014
+ 2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com>
2016
+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
2017
+ Remove un-necessary braces.
2019
+ 2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com>
2021
+ * config/aarch64/aarch64.c (aarch64_classify_symbol):
2022
+ Use SYMBOL_TINY_ABSOLUTE for AARCH64_CMODEL_TINY_PIC.
2024
+ 2013-05-30 Ian Bolton <ian.bolton@arm.com>
2026
+ * config/aarch64/aarch64.md (insv<mode>): New define_expand.
2027
+ (*insv_reg<mode>): New define_insn.
2029
+ 2012-05-29 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
2030
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
2032
+ * config/aarch64/aarch64-protos.h (aarch64_symbol_type): Define
2033
+ SYMBOL_TINY_ABSOLUTE.
2034
+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Handle
2035
+ SYMBOL_TINY_ABSOLUTE.
2036
+ (aarch64_expand_mov_immediate): Likewise.
2037
+ (aarch64_classify_symbol): Likewise.
2038
+ (aarch64_mov_operand_p): Remove ATTRIBUTE_UNUSED.
2039
+ Permit SYMBOL_TINY_ABSOLUTE.
2040
+ * config/aarch64/predicates.md (aarch64_mov_operand): Permit CONST.
2042
+ 2013-05-29 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
2043
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
2045
+ * config/aarch64/aarch64.c (aarch64_classify_symbol): Remove comment.
2046
+ Refactor if/switch. Replace gcc_assert with if.
2048
+ 2013-05-24 Ian Bolton <ian.bolton@arm.com>
2050
+ * config/aarch64/aarch64.c (aarch64_print_operand): Change the
2051
+ X format specifier to only display bottom 16 bits.
2052
+ * config/aarch64/aarch64.md (insv_imm<mode>): Allow any size of
2053
+ immediate to match for operand 2, since it will be masked.
2055
+ 2013-05-23 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
2056
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
2058
+ * config/aarch64/aarch64.md (*movdi_aarch64): Replace Usa with S.
2059
+ * config/aarch64/constraints.md (Usa): Remove.
2060
+ * doc/md.texi (AArch64 Usa): Remove.
2062
+ 2013-05-23 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
2063
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
2065
+ * config/aarch64/aarch64-protos.h (aarch64_mov_operand_p): Define.
2066
+ * config/aarch64/aarch64.c (aarch64_mov_operand_p): Define.
2067
+ * config/aarch64/predicates.md (aarch64_const_address): Remove.
2068
+ (aarch64_mov_operand): Use aarch64_mov_operand_p.
2070
+ 2013-05-23 Vidya Praveen <vidyapraveen@arm.com>
2072
+ * config/aarch64/aarch64-simd.md (clzv4si2): Support for CLZ
2073
+ instruction (AdvSIMD).
2074
+ * config/aarch64/aarch64-builtins.c
2075
+ (aarch64_builtin_vectorized_function): Handler for BUILT_IN_CLZ.
2076
+ * config/aarch64/aarch-simd-builtins.def: Entry for CLZ.
2078
+ 2013-05-14 James Greenhalgh <james.greenhalgh@arm.com>
2080
+ * config/aarch64/aarch64-simd.md
2081
+ (aarch64_vcond_internal<mode>): Rename to...
2082
+ (aarch64_vcond_internal<mode><mode>): ...This, for integer modes.
2083
+ (aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): ...This for
2084
+ float modes. Clarify all iterator modes.
2085
+ (vcond<mode><mode>): Use new name for vcond expanders.
2086
+ (vcond<v_cmp_result><mode>): Likewise.
2087
+ (vcondu<mode><mode>: Likewise.
2088
+ * config/aarch64/iterators.md (VDQF_COND): New.
2090
+2013-05-29 Christophe Lyon <christophe.lyon@linaro.org>
2092
+ Backport from trunk r198928,198973,199203.
2093
+ 2013-05-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
2097
+ * config/arm/arm.c (any_sibcall_uses_r3): Rename to ..
2098
+ (any_sibcall_could_use_r3): this and handle indirect calls.
2099
+ (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3.
2101
+ 2013-05-16 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
2104
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Add check
2107
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
2110
+ * config/arm/predicates.md (call_insn_operand): New predicate.
2111
+ * config/arm/constraints.md ("Cs", "Ss"): New constraints.
2112
+ * config/arm/arm.md (*call_insn, *call_value_insn): Match only
2113
+ if insn is not a tail call.
2114
+ (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through
2116
+ * config/arm/arm.h (enum reg_class): New caller save register class.
2117
+ (REG_CLASS_NAMES): Likewise.
2118
+ (REG_CLASS_CONTENTS): Likewise.
2119
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling
2122
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
2124
+ Backport from trunk r198680.
2125
+ 2013-05-07 Sofiane Naci <sofiane.naci@arm.com>
2127
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): call splitter.
2128
+ (aarch64_simd_mov<mode>): New expander.
2129
+ (aarch64_simd_mov_to_<mode>low): New instruction pattern.
2130
+ (aarch64_simd_mov_to_<mode>high): Likewise.
2131
+ (aarch64_simd_mov_from_<mode>low): Likewise.
2132
+ (aarch64_simd_mov_from_<mode>high): Likewise.
2133
+ (aarch64_dup_lane<mode>): Update.
2134
+ (aarch64_dup_lanedi): New instruction pattern.
2135
+ * config/aarch64/aarch64-protos.h (aarch64_split_simd_move): New prototype.
2136
+ * config/aarch64/aarch64.c (aarch64_split_simd_move): New function.
2138
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
2140
+ Backport from trunk r198497-198500.
2141
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2143
+ * config/aarch64/aarch64-builtins.c
2144
+ (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_.
2145
+ * config/aarch64/aarch64-simd-builtins.def
2146
+ (reduc_splus_): Add new modes.
2147
+ (reduc_uplus_): New.
2148
+ * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove.
2149
+ (reduc_uplus_v4sf): Likewise.
2150
+ (reduc_splus_v4sf): Likewise.
2151
+ (aarch64_addv<mode>): Likewise.
2152
+ (reduc_uplus_<mode>): Likewise.
2153
+ (reduc_splus_<mode>): Likewise.
2154
+ (aarch64_addvv2di): Likewise.
2155
+ (reduc_uplus_v2di): Likewise.
2156
+ (reduc_splus_v2di): Likewise.
2157
+ (aarch64_addvv2si): Likewise.
2158
+ (reduc_uplus_v2si): Likewise.
2159
+ (reduc_splus_v2si): Likewise.
2160
+ (reduc_<sur>plus_<mode>): New.
2161
+ (reduc_<sur>plus_v2di): Likewise.
2162
+ (reduc_<sur>plus_v2si): Likewise.
2163
+ (reduc_<sur>plus_v4sf): Likewise.
2164
+ (aarch64_addpv4sf): Likewise.
2165
+ * config/aarch64/arm_neon.h
2166
+ (vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins.
2167
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV,
2168
+ add UNSPEC_SADDV, UNSPEC_UADDV.
2170
+ (sur): Add UNSPEC_SADDV, UNSPEC_UADDV.
2172
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2174
+ * config/aarch64/arm_neon.h
2175
+ (v<max,min><nm><q><v>_<sfu><8, 16, 32, 64>): Rewrite using builtins.
2177
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2179
+ * config/aarch64/aarch64-builtins
2180
+ (aarch64_gimple_fold_builtin): Fold reduc_<su><maxmin>_ builtins.
2182
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2184
+ * config/aarch64/aarch64-simd-builtins.def
2185
+ (reduc_smax_): New.
2186
+ (reduc_smin_): Likewise.
2187
+ (reduc_umax_): Likewise.
2188
+ (reduc_umin_): Likewise.
2189
+ (reduc_smax_nan_): Likewise.
2190
+ (reduc_smin_nan_): Likewise.
2193
+ (smax): Update for V2SF, V4SF and V2DF modes.
2196
+ (smin_nan): Likewise.
2197
+ * config/aarch64/aarch64-simd.md (<maxmin><mode>3): Rename to...
2198
+ (<su><maxmin><mode>3): ...This, refactor.
2199
+ (s<maxmin><mode>3): New.
2200
+ (<maxmin_uns><mode>3): Likewise.
2201
+ (reduc_<maxmin_uns>_<mode>): Refactor.
2202
+ (reduc_<maxmin_uns>_v4sf): Likewise.
2203
+ (reduc_<maxmin_uns>_v2si): Likewise.
2204
+ (aarch64_<fmaxmin><mode>: Remove.
2205
+ * config/aarch64/arm_neon.h (vmax<q>_f<32,64>): Rewrite to use
2206
+ new builtin names.
2207
+ (vmin<q>_f<32,64>): Likewise.
2208
+ * config/iterators.md (unspec): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV.
2210
+ (su): Add mappings for smax, smin, umax, umin.
2212
+ (FMAXMINV): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV.
2213
+ (FMAXMIN): Rename as...
2214
+ (FMAXMIN_UNS): ...This.
2215
+ (maxminv): Remove.
2216
+ (fmaxminv): Likewise.
2217
+ (fmaxmin): Likewise.
2218
+ (maxmin_uns): New.
2219
+ (maxmin_uns_op): Likewise.
2221
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
2223
+ Backport from trunk r199241.
2224
+ 2013-05-23 James Greenhalgh <james.greenhalgh@arm.com>
2226
+ * config/aarch64/aarch64-simd.md
2227
+ (aarch64_cm<optab>di): Add clobber of CC_REGNUM to unsplit pattern.
2229
+2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
2231
+ Backport from trunk r198970.
2232
+ 2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
2234
+ * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
2235
+ * config/arm/arm.c (next_consecutive_mem): New function.
2236
+ (gen_movmem_ldrd_strd): Likewise.
2237
+ * config/arm/arm.md (movmemqi): Update condition and code.
2238
+ (unaligned_loaddi, unaligned_storedi): New patterns.
2240
+2013-05-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2242
+ * LINARO-VERSION: Bump version number.
2244
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2246
+ GCC Linaro 4.8-2013.05 released.
2248
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2250
+ Backport from trunk r198677.
2251
+ 2013-05-07 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2253
+ * config/aarch64/aarch64.md
2254
+ (cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): Restrict the
2255
+ shift value between 0-4.
2257
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2259
+ Backport from trunk r198574-198575.
2260
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
2262
+ * config/aarch64/aarch64-simd.md (simd_fabd): Correct the description.
2264
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
2266
+ * config/aarch64/aarch64-simd.md (*fabd_scalar<mode>3): Support
2267
+ scalar form of FABD instruction.
2269
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2271
+ Backport from trunk r198490-198496
2272
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2274
+ * config/aarch64/arm_neon.h
2275
+ (vac<ge, gt><sd>_f<32, 64>): Rename to...
2276
+ (vca<ge, gt><sd>_f<32, 64>): ...this, reimpliment in C.
2277
+ (vca<ge, gt, lt, le><q>_f<32, 64>): Reimpliment in C.
2279
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2281
+ * config/aarch64/aarch64-simd.md (*aarch64_fac<optab><mode>): New.
2282
+ * config/aarch64/iterators.md (FAC_COMPARISONS): New.
2284
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2286
+ * config/aarch64/aarch64-simd.md
2287
+ (vcond<mode>_internal): Handle special cases for constant masks.
2288
+ (vcond<mode><mode>): Allow nonmemory_operands for outcome vectors.
2289
+ (vcondu<mode><mode>): Likewise.
2290
+ (vcond<v_cmp_result><mode>): New.
2292
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2294
+ * config/aarch64/aarch64-builtins.c (BUILTIN_VALLDI): Define.
2295
+ (aarch64_fold_builtin): Add folding for cm<eq,ge,gt,tst>.
2296
+ * config/aarch64/aarch64-simd-builtins.def
2297
+ (cmeq): Update to BUILTIN_VALLDI.
2302
+ * config/aarch64/arm_neon.h
2303
+ (vc<eq, lt, le, gt, ge, tst><z><qsd>_<fpsu><8,16,32,64>): Remap
2304
+ to builtins or C as appropriate.
2306
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
2308
+ * config/aarch64/aarch64-simd-builtins.def (cmhs): Rename to...
2310
+ (cmhi): Rename to...
2312
+ * config/aarch64/aarch64-simd.md
2313
+ (simd_mode): Add SF.
2314
+ (aarch64_vcond_internal): Use new names for unsigned comparison insns.
2315
+ (aarch64_cm<optab><mode>): Rewrite to not use UNSPECs.
2316
+ * config/aarch64/aarch64.md (*cstore<mode>_neg): Rename to...
2317
+ (cstore<mode>_neg): ...This.
2318
+ * config/aarch64/iterators.md
2320
+ (unspec): Remove UNSPEC_CM<EQ, LE, LT, GE, GT, HS, HI, TST>.
2321
+ (COMPARISONS): New.
2322
+ (UCOMPARISONS): Likewise.
2323
+ (optab): Add missing comparisons.
2325
+ (cmp_1): Likewise.
2326
+ (cmp_2): Likewise.
2329
+ (VCMP_S): Likewise.
2330
+ (VCMP_U): Likewise.
2331
+ (V_cmp_result): Add DF, SF modes.
2332
+ (v_cmp_result): Likewise.
2334
+ (vmtype): Likewise.
2335
+ * config/aarch64/predicates.md (aarch64_reg_or_fp_zero): New.
2337
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2339
+ Backport from trunk r198191.
2340
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
2342
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add simd attribute.
2344
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@lianro.org>
2346
+ Backport from trunk r197838.
2347
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2349
+ * config/aarch64/aarch64.c (aarch64_select_cc_mode): Allow NEG
2350
+ code in CC_NZ mode.
2351
+ * config/aarch64/aarch64.md (*neg_<shift><mode>3_compare0): New
2354
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2356
+ Backport from trunk r198019.
2357
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2359
+ * config/aarch64/aarch64.md (*adds_mul_imm_<mode>): New pattern.
2360
+ (*subs_mul_imm_<mode>): New pattern.
2362
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2364
+ Backport from trunk r198424-198425.
2365
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
2367
+ * config/aarch64/aarch64.md (movsi_aarch64): Support LDR/STR
2368
+ from/to S register.
2369
+ (movdi_aarch64): Support LDR/STR from/to D register.
2371
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
2373
+ * common/config/aarch64/aarch64-common.c: Enable REE pass at O2
2374
+ or higher by default.
2376
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2378
+ Backport from trunk r198412.
2379
+ 2013-04-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2381
+ * config/arm/arm.md (store_minmaxsi): Use only when
2382
+ optimize_insn_for_size_p.
2384
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2386
+ Backport from trunk 198394,198396-198400,198402-198404.
2387
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2389
+ * config/aarch64/arm_neon.h
2390
+ (vcvt<sd>_f<32,64>_s<32,64>): Rewrite in C.
2391
+ (vcvt<q>_f<32,64>_s<32,64>): Rewrite using builtins.
2392
+ (vcvt_<high_>_f<32,64>_f<32,64>): Likewise.
2393
+ (vcvt<qsd>_<su><32,64>_f<32,64>): Likewise.
2394
+ (vcvta<qsd>_<su><32,64>_f<32,64>): Likewise.
2395
+ (vcvtm<qsd>_<su><32,64>_f<32,64>): Likewise.
2396
+ (vcvtn<qsd>_<su><32,64>_f<32,64>): Likewise.
2397
+ (vcvtp<qsd>_<su><32,64>_f<32,64>): Likewise.
2399
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2401
+ * config/aarch64/aarch64-simd.md
2402
+ (<optab><VDQF:mode><fcvt_target>2): New, maps to fix, fixuns.
2403
+ (<fix_trunc_optab><VDQF:mode><fcvt_target>2): New, maps to
2404
+ fix_trunc, fixuns_trunc.
2405
+ (ftrunc<VDQF:mode>2): New.
2406
+ * config/aarch64/iterators.md (optab): Add fix, fixuns.
2407
+ (fix_trunc_optab): New.
2409
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2411
+ * config/aarch64/aarch64-builtins.c
2412
+ (aarch64_builtin_vectorized_function): Vectorize over ifloorf,
2413
+ iceilf, lround, iroundf.
2415
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2417
+ * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
2418
+ (float_truncate_hi_): Likewise.
2419
+ (float_extend_lo_): Likewise.
2420
+ (float_truncate_lo_): Likewise.
2421
+ * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
2422
+ (aarch64_float_extend_lo_v2df): Likewise.
2423
+ (vec_unpacks_hi_v4sf): Likewise.
2424
+ (aarch64_float_truncate_lo_v2sf): Likewise.
2425
+ (aarch64_float_truncate_hi_v4sf): Likewise.
2426
+ (vec_pack_trunc_v2df): Likewise.
2427
+ (vec_pack_trunc_df): Likewise.
2429
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2431
+ * config/aarch64/aarch64-builtins.c
2432
+ (aarch64_fold_builtin): Fold float conversions.
2433
+ * config/aarch64/aarch64-simd-builtins.def
2434
+ (floatv2si, floatv4si, floatv2di): New.
2435
+ (floatunsv2si, floatunsv4si, floatunsv2di): Likewise.
2436
+ * config/aarch64/aarch64-simd.md
2437
+ (<optab><fcvt_target><VDQF:mode>2): New, expands to float and floatuns.
2438
+ * config/aarch64/iterators.md (FLOATUORS): New.
2439
+ (optab): Add float, floatuns.
2440
+ (su_optab): Likewise.
2442
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
2444
+ * config/aarch64/aarch64-builtins.c
2445
+ (aarch64_builtin_vectorized_function): Fold to standard pattern names.
2446
+ * config/aarch64/aarch64-simd-builtins.def (frintn): New.
2447
+ (frintz): Rename to...
2448
+ (btrunc): ...this.
2449
+ (frintp): Rename to...
2451
+ (frintm): Rename to...
2453
+ (frinti): Rename to...
2454
+ (nearbyint): ...this.
2455
+ (frintx): Rename to...
2457
+ (frinta): Rename to...
2459
+ * config/aarch64/aarch64-simd.md
2460
+ (aarch64_frint<frint_suffix><mode>): Delete.
2461
+ (<frint_pattern><mode>2): Convert to insn.
2462
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRINTN.
2463
+ * config/aarch64/iterators.md (FRINT): Add UNSPEC_FRINTN.
2464
+ (frint_pattern): Likewise.
2465
+ (frint_suffix): Likewise.
2467
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2469
+ Backport from trunk r198302-198306,198316.
2470
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
2472
+ * config/aarch64/aarch64-simd.md
2473
+ (aarch64_simd_bsl<mode>_internal): Rewrite RTL to not use UNSPEC_BSL.
2474
+ (aarch64_simd_bsl<mode>): Likewise.
2475
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_BSL.
2477
+ 2013-04-25 James Greenhalgh <jame.greenhalgh@arm.com>
2479
+ * config/aarch64/aarch64-simd.md (neg<mode>2): Use VDQ iterator.
2481
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
2483
+ * config/aarch64/aarch64-builtins.c
2484
+ (aarch64_fold_builtin): New.
2485
+ * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): New.
2486
+ * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
2487
+ * config/aarch64/aarch64-simd-builtins.def (abs): New.
2488
+ * config/aarch64/arm_neon.h
2489
+ (vabs<q>_<f32, 64>): Implement using __builtin_aarch64_fabs.
2491
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
2492
+ Tejas Belagod <tejas.belagod@arm.com>
2494
+ * config/aarch64/aarch64-builtins.c
2495
+ (aarch64_gimple_fold_builtin): New.
2496
+ * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
2497
+ * config/aarch64/aarch64-simd-builtins.def (addv): New.
2498
+ * config/aarch64/aarch64-simd.md (addpv4sf): New.
2499
+ (addvv4sf): Update.
2500
+ * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
2502
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2504
+ * config/aarch64/aarch64.md
2505
+ (*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): New pattern.
2507
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2509
+ * config/aarch64/aarch64.md (*ngc<mode>): New pattern.
2510
+ (*ngcsi_uxtw): New pattern.
2512
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2514
+ Backport from trunk 198298.
2515
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2516
+ Julian Brown <julian@codesourcery.com>
2518
+ * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
2519
+ (TB_DREG): Add T_V4HF.
2520
+ (v4hf_UP): New macro.
2521
+ (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
2522
+ (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
2523
+ NEON_FLOAT_NARROW.
2524
+ Handle initialisation of V4HF. Adjust initialisation of reinterpret
2526
+ (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
2527
+ NEON_FLOAT_NARROW.
2528
+ (arm_vector_mode_supported_p): Handle V4HF.
2529
+ (arm_mangle_map): Handle V4HFmode.
2530
+ * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
2531
+ * config/arm/arm_neon_builtins.def: Add entries for
2532
+ vcvtv4hfv4sf, vcvtv4sfv4hf.
2533
+ * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
2534
+ (neon_vcvtv4hfv4sf): Likewise.
2535
+ * config/arm/neon-gen.ml: Handle half-precision floating point
2537
+ * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
2538
+ * config/arm/arm_neon.h: Regenerate.
2539
+ * config/arm/neon.ml (type elts): Add F16.
2540
+ (type vectype): Add T_float16x4, T_floatHF.
2541
+ (type vecmode): Add V4HF.
2542
+ (type features): Add Requires_FP_bit feature.
2543
+ (elt_width): Handle F16.
2544
+ (elt_class): Likewise.
2545
+ (elt_of_class_width): Likewise.
2546
+ (mode_of_elt): Refactor.
2547
+ (type_for_elt): Handle F16, fix error messages.
2548
+ (vectype_size): Handle T_float16x4.
2549
+ (vcvt_sh): New function.
2550
+ (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
2551
+ (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
2552
+ (string_of_mode): Handle V4HF.
2553
+ * doc/arm-neon-intrinsics.texi: Regenerate.
2555
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2557
+ Backport from trunk r198136-198137,198142,198176.
2558
+ 2013-04-23 Andreas Schwab <schwab@linux-m68k.org>
2560
+ * coretypes.h (gimple_stmt_iterator): Add struct to make
2561
+ compatible with C.
2563
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
2565
+ * coretypes.h (gimple_stmt_iterator_d): Forward declare.
2566
+ (gimple_stmt_iterator): New typedef.
2567
+ * gimple.h (gimple_stmt_iterator): Rename to...
2568
+ (gimple_stmt_iterator_d): ... This.
2569
+ * doc/tm.texi.in (TARGET_FOLD_BUILTIN): Detail restriction that
2570
+ trees be valid for GIMPLE and GENERIC.
2571
+ (TARGET_GIMPLE_FOLD_BUILTIN): New.
2572
+ * gimple-fold.c (gimple_fold_call): Call target hook
2573
+ gimple_fold_builtin.
2574
+ * hooks.c (hook_bool_gsiptr_false): New.
2575
+ * hooks.h (hook_bool_gsiptr_false): New.
2576
+ * target.def (fold_stmt): New.
2577
+ * doc/tm.texi: Regenerate.
2579
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
2581
+ * config/aarch64/aarch64-builtins.c
2583
+ (CF0, CF1, CF2, CF3, CF4, CF10): New.
2584
+ (VAR<1-12>): Add MAP parameter.
2585
+ (BUILTIN_*): Likewise.
2586
+ * config/aarch64/aarch64-simd-builtins.def: Set MAP parameter.
2587
+ * config/aarch64/aarch64-simd.md (aarch64_sshl_n<mode>): Remove.
2588
+ (aarch64_ushl_n<mode>): Likewise.
2589
+ (aarch64_sshr_n<mode>): Likewise.
2590
+ (aarch64_ushr_n<mode>): Likewise.
2591
+ (aarch64_<maxmin><mode>): Likewise.
2592
+ (aarch64_sqrt<mode>): Likewise.
2593
+ * config/aarch64/arm_neon.h (vshl<q>_n_*): Use new builtin names.
2594
+ (vshr<q>_n_*): Likewise.
2596
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
2598
+ * config/aarch64/aarch64-builtins.c
2599
+ (aarch64_simd_builtin_type_mode): Handle SF types.
2601
+ (BUILTIN_GPF): Define.
2602
+ (aarch64_init_simd_builtins): Handle SF types.
2603
+ * config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
2604
+ (frecps): Likewise.
2605
+ (frecpx): Likewise.
2606
+ * config/aarch64/aarch64-simd.md
2607
+ (simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
2608
+ (aarch64_frecpe<mode>): New.
2609
+ (aarch64_frecps<mode>): Likewise.
2610
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
2611
+ (v8type): Add frecp<esx>.
2612
+ (aarch64_frecp<FRECP:frecp_suffix><mode>): New.
2613
+ (aarch64_frecps<mode>): Likewise.
2614
+ * config/aarch64/iterators.md (FRECP): New.
2615
+ (frecp_suffix): Likewise.
2616
+ * config/aarch64/arm_neon.h
2617
+ (vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.
2619
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2621
+ Backport from trunk r198030.
2622
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
2624
+ * config/arm/arm.md (movsicc_insn): Convert define_insn into
2625
+ define_insn_and_split.
2626
+ (and_scc,ior_scc,negscc): Likewise.
2627
+ (cmpsi2_addneg, subsi3_compare): Convert to named patterns.
2629
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2631
+ Backport from trunk r198020.
2632
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2634
+ * config/aarch64/aarch64.md (*adds_<optab><mode>_multp2):
2636
+ (*subs_<optab><mode>_multp2): New pattern.
2637
+ (*adds_<optab><ALLX:mode>_<GPI:mode>): New pattern.
2638
+ (*subs_<optab><ALLX:mode>_<GPI:mode>): New pattern.
2640
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2642
+ Backport from trunk r198004,198029.
2643
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
2645
+ * config/arm/arm.c (use_return_insn): Return 0 for targets that
2646
+ can benefit from using a sequence of LDRD instructions in epilogue
2647
+ instead of a single LDM instruction.
2649
+ 2013-04-16 Greta Yorsh <Greta.Yorsh at arm.com>
2651
+ * config/arm/arm.c (emit_multi_reg_push): New declaration
2652
+ for an existing function.
2653
+ (arm_emit_strd_push): New function.
2654
+ (arm_expand_prologue): Used here.
2655
+ (arm_emit_ldrd_pop): New function.
2656
+ (arm_expand_epilogue): Used here.
2657
+ (arm_get_frame_offsets): Update condition.
2658
+ (arm_emit_multi_reg_pop): Add a special case for load of a single
2659
+ register with writeback.
2661
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2663
+ Backport from trunk r197965.
2664
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2666
+ * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case.
2667
+ * config/arm/arm.md (*anddi3_insn): Change to insn_and_split.
2668
+ * config/arm/constraints.md (De): New constraint.
2669
+ * config/arm/neon.md (anddi3_neon): Delete.
2670
+ (neon_vand<mode>): Expand to standard anddi3 pattern.
2671
+ * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
2672
+ Move earlier in the file.
2673
+ (neon_inv_logic_op2): Likewise.
2674
+ (arm_anddi_operand_neon): New predicate.
2676
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2678
+ Backport from trunk r197925.
2679
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
2681
+ * config/arm/arm.md (mov_scc,mov_negscc,mov_notscc): Convert
2682
+ define_insn into define_insn_and_split and emit movsicc patterns.
2684
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2686
+ Backport from trunk r197807.
2687
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2689
+ * config/aarch64/aarch64.h (REVERSIBLE_CC_MODE): Define.
2691
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2693
+ Backport from trunk r197642.
2694
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2696
+ * config/arm/arm.md (minmax_arithsi_non_canon): New pattern.
2698
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2700
+ Backport from trunk r197530,197921.
2701
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
2703
+ * config/arm/arm.c (gen_operands_ldrd_strd): Initialize "base".
2705
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2707
+ * config/arm/constraints.md (q): New constraint.
2708
+ * config/arm/ldrdstrd.md: New file.
2709
+ * config/arm/arm.md (ldrdstrd.md) New include.
2710
+ (arm_movdi): Use "q" instead of "r" constraint
2711
+ for double-word memory access.
2712
+ (movdf_soft_insn): Likewise.
2713
+ * config/arm/vfp.md (movdi_vfp): Likewise.
2714
+ * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md.
2715
+ * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration.
2716
+ * config/arm/arm.c (gen_operands_ldrd_strd): New function.
2717
+ (mem_ok_for_ldrd_strd): Likewise.
2718
+ (output_move_double): Update assertion.
2720
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2722
+ Backport of trunk r197518-197522,197526-197528.
2723
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2725
+ * config/arm/arm.md (arm_smax_insn): Convert define_insn into
2726
+ define_insn_and_split.
2727
+ (arm_smin_insn,arm_umaxsi3,arm_uminsi3): Likewise.
2729
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2731
+ * config/arm/arm.md (arm_ashldi3_1bit): Convert define_insn into
2732
+ define_insn_and_split.
2733
+ (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise.
2734
+ (shiftsi3_compare): New pattern.
2735
+ (rrx): New pattern.
2736
+ * config/arm/unspecs.md (UNSPEC_RRX): New.
2738
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2740
+ * config/arm/arm.md (negdi_extendsidi): New pattern.
2741
+ (negdi_zero_extendsidi): Likewise.
2743
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2745
+ * config/arm/arm.md (andsi_iorsi3_notsi): Convert define_insn into
2746
+ define_insn_and_split.
2747
+ (arm_negdi2,arm_abssi2,arm_neg_abssi2): Likewise.
2748
+ (arm_cmpdi_insn,arm_cmpdi_unsigned): Likewise.
2750
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2752
+ * config/arm/arm.md (arm_subdi3): Convert define_insn into
2753
+ define_insn_and_split.
2754
+ (subdi_di_zesidi,subdi_di_sesidi): Likewise.
2755
+ (subdi_zesidi_di,subdi_sesidi_di,subdi_zesidi_zesidi): Likewise.
2757
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2759
+ * config/arm/arm.md (subsi3_carryin): New pattern.
2760
+ (subsi3_carryin_const): Likewise.
2761
+ (subsi3_carryin_compare,subsi3_carryin_compare_const): Likewise.
2762
+ (subsi3_carryin_shift,rsbsi3_carryin_shift): Likewise.
2764
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2766
+ * config/arm/arm.md (incscc,arm_incscc,decscc,arm_decscc): Delete.
2768
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
2770
+ * config/arm/arm.md (addsi3_carryin_<optab>): Set attribute predicable.
2771
+ (addsi3_carryin_alt2_<optab>,addsi3_carryin_shift_<optab>): Likewise.
2773
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2775
+ Backport of trunk r197517.
2776
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2778
+ * config/arm/arm.c (arm_expand_builtin): Change fcode
2779
+ type to unsigned int.
2781
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2783
+ Backport of trunk r197513.
2784
+ 2013-04-05 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
2786
+ * doc/invoke.texi (ARM Options): Document cortex-a53 support.
2788
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2790
+ Backport of trunk r197489-197491.
2791
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2793
+ * config/arm/arm-protos.h (arm_builtin_vectorized_function):
2794
+ New function prototype.
2795
+ * config/arm/arm.c (TARGET_VECTORIZE_BUILTINS): Define.
2796
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
2797
+ (arm_builtin_vectorized_function): New function.
2799
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2801
+ * config/arm/arm_neon_builtins.def: New file.
2802
+ * config/arm/arm.c (neon_builtin_data): Move contents to
2803
+ arm_neon_builtins.def.
2804
+ (enum arm_builtins): Include neon builtin definitions.
2805
+ (ARM_BUILTIN_NEON_BASE): Move from enum to macro.
2806
+ * config/arm/t-arm (arm.o): Add dependency on
2807
+ arm_neon_builtins.def.
2809
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2811
+ Backport of trunk 196795-196797,196957
2812
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
2814
+ * config/aarch64/aarch64.md (*sub<mode>3_carryin): New pattern.
2815
+ (*subsi3_carryin_uxtw): Likewise.
2817
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
2819
+ * config/aarch64/aarch64.md (*ror<mode>3_insn): New pattern.
2820
+ (*rorsi3_insn_uxtw): Likewise.
2822
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
2824
+ * config/aarch64/aarch64.md (*extr<mode>5_insn): New pattern.
2825
+ (*extrsi5_insn_uxtw): Likewise.
2827
+2013-04-10 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2829
+ * LINARO-VERSION: Bump version number.
2831
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2833
+ * GCC Linaro 4.8-2013.04 released.
2835
+ * LINARO-VERSION: New file.
2836
+ * configure.ac: Add Linaro version string.
2837
+ * configure: Regenerate.
2839
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2841
+ Backport of trunk r197346.
2842
+ 2013-04-02 Ian Caulfield <ian.caulfield@arm.com>
2843
+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
2845
+ * config/arm/arm-arches.def (armv8-a): Default to cortex-a53.
2846
+ * config/arm/t-arm (MD_INCLUDES): Depend on cortex-a53.md.
2847
+ * config/arm/cortex-a53.md: New file.
2848
+ * config/arm/bpabi.h (BE8_LINK_SPEC): Handle cortex-a53.
2849
+ * config/arm/arm.md (generic_sched, generic_vfp): Handle cortex-a53.
2850
+ * config/arm/arm.c (arm_issue_rate): Likewise.
2851
+ * config/arm/arm-tune.md: Regenerate
2852
+ * config/arm/arm-tables.opt: Regenerate.
2853
+ * config/arm/arm-cores.def: Add cortex-a53.
2855
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2857
+ Backport of trunk r197342.
2858
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
2860
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add variants for
2861
+ scalar load/store operations using B/H registers.
2862
+ (*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Likewise.
2864
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2866
+ Backport of trunk r197341.
2867
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
2869
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add alternatives for
2871
+ * config/aarch64/aarch64.c
2872
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
2873
+ * config/aarch64/aarch64-protos.h
2874
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
2875
+ * config/aarch64/constraints.md (Dh, Dq): New.
2876
+ * config/aarch64/iterators.md (hq): New.
2878
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2880
+ Backport from trunk r197207.
2881
+ 2013-03-28 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2883
+ * config/aarch64/aarch64.md (*and<mode>3_compare0): New pattern.
2884
+ (*andsi3_compare0_uxtw): New pattern.
2885
+ (*and_<SHIFT:optab><mode>3_compare0): New pattern.
2886
+ (*and_<SHIFT:optab>si3_compare0_uxtw): New pattern.
2888
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2890
+ Backport from trunk r197153.
2891
+ 2013-03-27 Terry Guo <terry.guo@arm.com>
2893
+ * config/arm/arm-cores.def: Added core cortex-r7.
2894
+ * config/arm/arm-tune.md: Regenerated.
2895
+ * config/arm/arm-tables.opt: Regenerated.
2896
+ * doc/invoke.texi: Added entry for core cortex-r7.
2898
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2900
+ Backport from trunk r197052.
2901
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2903
+ * config/arm/arm.md (f_sels, f_seld): New types.
2904
+ (*cmov<mode>): New pattern.
2905
+ * config/arm/predicates.md (arm_vsel_comparison_operator): New
2908
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2910
+ Backport from trunk r197046.
2911
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
2913
+ * config/arm/arm.c (arm_emit_load_exclusive): Add acq parameter.
2914
+ Emit load-acquire versions when acq is true.
2915
+ (arm_emit_store_exclusive): Add rel parameter.
2916
+ Emit store-release versions when rel is true.
2917
+ (arm_split_compare_and_swap): Use acquire-release instructions
2919
+ of barriers when appropriate.
2920
+ (arm_split_atomic_op): Likewise.
2921
+ * config/arm/arm.h (TARGET_HAVE_LDACQ): New macro.
2922
+ * config/arm/unspecs.md (VUNSPEC_LAX): New unspec.
2923
+ (VUNSPEC_SLX): Likewise.
2924
+ (VUNSPEC_LDA): Likewise.
2925
+ (VUNSPEC_STL): Likewise.
2926
+ * config/arm/sync.md (atomic_load<mode>): New pattern.
2927
+ (atomic_store<mode>): Likewise.
2928
+ (arm_load_acquire_exclusive<mode>): Likewise.
2929
+ (arm_load_acquire_exclusivesi): Likewise.
2930
+ (arm_load_acquire_exclusivedi): Likewise.
2931
+ (arm_store_release_exclusive<mode>): Likewise.
2933
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2935
+ Backport from trunk r196876.
2936
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
2938
+ * config/arm/arm-protos.h (tune_params): Add
2939
+ prefer_neon_for_64bits field.
2940
+ * config/arm/arm.c (prefer_neon_for_64bits): New variable.
2941
+ (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
2942
+ (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
2943
+ (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
2944
+ (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
2945
+ (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
2946
+ (arm_option_override): Handle -mneon-for-64bits new option.
2947
+ * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
2948
+ (prefer_neon_for_64bits): Declare new variable.
2949
+ * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
2950
+ avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
2952
+ (arch_enabled): Handle new arch types. Remove support for onlya8
2954
+ (one_cmpldi2): Use new arch names.
2955
+ * config/arm/arm.opt (mneon-for-64bits): Add option.
2956
+ * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
2957
+ (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
2958
+ neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
2960
+ * doc/invoke.texi (-mneon-for-64bits): Document.
2962
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2964
+ Backport from trunk r196858.
2965
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2967
+ * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute.
2968
+ (abd<mode>_3): New pattern.
2969
+ (aba<mode>_3): New pattern.
2970
+ (fabd<mode>_3): New pattern.
2972
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
2974
+ Backport from trunk r196856.
2975
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
2977
+ * config/aarch64/aarch64-elf.h (REGISTER_PREFIX): Remove.
2978
+ * config/aarch64/aarch64.c (aarch64_print_operand): Remove all
2979
+ occurrence of REGISTER_PREFIX as its empty string.
2980
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
2981
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
2983
+/* { dg-do compile } */
2984
+/* { dg-require-effective-target arm_v8_neon_ok } */
2985
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
2986
+/* { dg-add-options arm_v8_neon } */
2991
+foo (float *output, float *input)
2994
+ /* Vectorizable. */
2995
+ for (i = 0; i < N; i++)
2996
+ output[i] = __builtin_floorf (input[i]);
2999
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_floorf } } } */
3000
+/* { dg-final { cleanup-tree-dump "vect" } } */
3001
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
3002
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
3004
+/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
3005
+/* This file was autogenerated by neon-testgen. */
3007
+/* { dg-do assemble } */
3008
+/* { dg-require-effective-target arm_neon_fp16_ok } */
3009
+/* { dg-options "-save-temps -O0" } */
3010
+/* { dg-add-options arm_neon_fp16 } */
3012
+#include "arm_neon.h"
3014
+void test_vcvtf32_f16 (void)
3016
+ float32x4_t out_float32x4_t;
3017
+ float16x4_t arg0_float16x4_t;
3019
+ out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
3022
+/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
3023
+/* { dg-final { cleanup-saved-temps } } */
3024
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
3025
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
3027
+/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
3028
+/* This file was autogenerated by neon-testgen. */
3030
+/* { dg-do assemble } */
3031
+/* { dg-require-effective-target arm_neon_fp16_ok } */
3032
+/* { dg-options "-save-temps -O0" } */
3033
+/* { dg-add-options arm_neon_fp16 } */
3035
+#include "arm_neon.h"
3037
+void test_vcvtf16_f32 (void)
3039
+ float16x4_t out_float16x4_t;
3040
+ float32x4_t arg0_float32x4_t;
3042
+ out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
3045
+/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
3046
+/* { dg-final { cleanup-saved-temps } } */
3047
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
3048
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
3050
+/* { dg-do compile } */
3051
+/* { dg-options "-O1" } */
3054
+muld (unsigned long long X, unsigned long long Y)
3056
+ unsigned long long mask = 0xffffffffull;
3057
+ return (X & mask) * (Y & mask);
3060
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
3061
--- a/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
3062
+++ b/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
3064
+/* { dg-do compile } */
3065
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
3066
+/* { dg-options "-O2" } */
3067
+int foo(int a, int b, int* p, int *q)
3074
+/* { dg-final { scan-assembler "ldrd" } } */
3075
--- a/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
3076
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
3078
+/* { dg-do compile } */
3079
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3080
+/* { dg-options "-O2" } */
3081
+/* { dg-add-options arm_v8_vfp } */
3084
+foo (double x, double y)
3086
+ volatile int i = 0;
3087
+ return i > 0 ? x : y;
3090
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
3091
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
3092
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
3094
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3095
+/* { dg-do compile } */
3096
+/* { dg-options "-O2" } */
3097
+/* { dg-add-options arm_arch_v8a } */
3099
+#include "../aarch64/atomic-op-relaxed.x"
3101
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3102
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3103
+/* { dg-final { scan-assembler-not "dmb" } } */
3104
--- a/src/gcc/testsuite/gcc.target/arm/vselgesf.c
3105
+++ b/src/gcc/testsuite/gcc.target/arm/vselgesf.c
3107
+/* { dg-do compile } */
3108
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3109
+/* { dg-options "-O2" } */
3110
+/* { dg-add-options arm_v8_vfp } */
3113
+foo (float x, float y)
3115
+ volatile int i = 0;
3116
+ return i >= 0 ? x : y;
3119
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
3120
--- a/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
3121
+++ b/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
3123
+/* { dg-do compile } */
3124
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
3125
+/* { dg-options "-O2" } */
3126
+void foo(int a, int b, int* p)
3131
+/* { dg-final { scan-assembler "strd" } } */
3132
--- a/src/gcc/testsuite/gcc.target/arm/negdi-1.c
3133
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-1.c
3135
+/* { dg-do compile } */
3136
+/* { dg-require-effective-target arm32 } */
3137
+/* { dg-options "-O2" } */
3139
+signed long long extendsidi_negsi (signed int x)
3147
+ mov r1, r0, asr #31
3149
+/* { dg-final { scan-assembler-times "rsb" 1 { target { arm_nothumb } } } } */
3150
+/* { dg-final { scan-assembler-times "negs\\t" 1 { target { ! { arm_nothumb } } } } } */
3151
+/* { dg-final { scan-assembler-times "asr" 1 } } */
3152
--- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
3153
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
3155
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3156
+/* { dg-do compile } */
3157
+/* { dg-options "-O2" } */
3158
+/* { dg-add-options arm_arch_v8a } */
3160
+#include "../aarch64/atomic-comp-swap-release-acquire.x"
3162
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 4 } } */
3163
+/* { dg-final { scan-assembler-times "stlex" 4 } } */
3164
+/* { dg-final { scan-assembler-not "dmb" } } */
3165
--- a/src/gcc/testsuite/gcc.target/arm/pr19599.c
3166
+++ b/src/gcc/testsuite/gcc.target/arm/pr19599.c
3168
+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" } { "" } } */
3169
+/* { dg-options "-O2 -march=armv5te -marm" } */
3170
+/* { dg-final { scan-assembler "bx" } } */
3172
+int (*indirect_func)();
3174
+int indirect_call()
3176
+ return indirect_func();
3178
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
3179
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
3181
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3182
+/* { dg-do compile } */
3183
+/* { dg-options "-O2" } */
3184
+/* { dg-add-options arm_arch_v8a } */
3186
+#include "../aarch64/atomic-op-seq_cst.x"
3188
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3189
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3190
+/* { dg-final { scan-assembler-not "dmb" } } */
3191
--- a/src/gcc/testsuite/gcc.target/arm/vselgedf.c
3192
+++ b/src/gcc/testsuite/gcc.target/arm/vselgedf.c
3194
+/* { dg-do compile } */
3195
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3196
+/* { dg-options "-O2" } */
3197
+/* { dg-add-options arm_v8_vfp } */
3200
+foo (double x, double y)
3202
+ volatile int i = 0;
3203
+ return i >= 0 ? x : y;
3206
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
3207
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
3208
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
3210
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3211
+/* { dg-do compile } */
3212
+/* { dg-options "-O2" } */
3213
+/* { dg-add-options arm_arch_v8a } */
3215
+#include "../aarch64/atomic-op-consume.x"
3217
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3218
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3219
+/* { dg-final { scan-assembler-not "dmb" } } */
3220
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
3221
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
3223
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3224
+/* { dg-do compile } */
3225
+/* { dg-options "-O2" } */
3226
+/* { dg-add-options arm_arch_v8a } */
3228
+#include "../aarch64/atomic-op-char.x"
3230
+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3231
+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3232
+/* { dg-final { scan-assembler-not "dmb" } } */
3233
--- a/src/gcc/testsuite/gcc.target/arm/vselnesf.c
3234
+++ b/src/gcc/testsuite/gcc.target/arm/vselnesf.c
3236
+/* { dg-do compile } */
3237
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3238
+/* { dg-options "-O2" } */
3239
+/* { dg-add-options arm_v8_vfp } */
3242
+foo (float x, float y)
3244
+ volatile int i = 0;
3245
+ return i != 0 ? x : y;
3248
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
3249
--- a/src/gcc/testsuite/gcc.target/arm/negdi-2.c
3250
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-2.c
3252
+/* { dg-do compile } */
3253
+/* { dg-require-effective-target arm32 } */
3254
+/* { dg-options "-O2" } */
3256
+signed long long zero_extendsidi_negsi (unsigned int x)
3265
+/* { dg-final { scan-assembler-times "rsb\\tr0, r0, #0" 1 { target { arm_nothumb } } } } */
3266
+/* { dg-final { scan-assembler-times "negs\\tr0, r0" 1 { target { ! arm_nothumb } } } } */
3267
+/* { dg-final { scan-assembler-times "mov" 1 } } */
3268
--- a/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
3269
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
3271
+/* { dg-do compile } */
3272
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3273
+/* { dg-options "-O2" } */
3274
+/* { dg-add-options arm_v8_vfp } */
3277
+foo (float x, float y)
3279
+ return !__builtin_isunordered (x, y) ? x : y;
3282
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
3283
--- a/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
3284
+++ b/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
3286
+/* { dg-do compile } */
3287
+/* { dg-options "-O2" } */
3289
+#define MAX(a, b) (a > b ? a : b)
3291
+foo (int a, int b, int c)
3293
+ return c - MAX (a, b);
3296
+/* { dg-final { scan-assembler "rsbge" } } */
3297
+/* { dg-final { scan-assembler "rsblt" } } */
3298
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
3299
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
3301
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3302
+/* { dg-do compile } */
3303
+/* { dg-options "-O2" } */
3304
+/* { dg-add-options arm_arch_v8a } */
3306
+#include "../aarch64/atomic-op-release.x"
3308
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3309
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3310
+/* { dg-final { scan-assembler-not "dmb" } } */
3311
--- a/src/gcc/testsuite/gcc.target/arm/vselvssf.c
3312
+++ b/src/gcc/testsuite/gcc.target/arm/vselvssf.c
3314
+/* { dg-do compile } */
3315
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3316
+/* { dg-options "-O2" } */
3317
+/* { dg-add-options arm_v8_vfp } */
3320
+foo (float x, float y)
3322
+ return __builtin_isunordered (x, y) ? x : y;
3325
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
3326
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
3327
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
3329
+/* { dg-do compile } */
3330
+/* { dg-require-effective-target arm_v8_neon_ok } */
3331
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
3332
+/* { dg-add-options arm_v8_neon } */
3337
+foo (float *output, float *input)
3340
+ /* Vectorizable. */
3341
+ for (i = 0; i < N; i++)
3342
+ output[i] = __builtin_roundf (input[i]);
3345
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_roundf } } } */
3346
+/* { dg-final { cleanup-tree-dump "vect" } } */
3347
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
3348
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
3350
+/* Check that Neon is *not* used by default to handle 64-bits scalar
3353
+/* { dg-do compile } */
3354
+/* { dg-require-effective-target arm_neon_ok } */
3355
+/* { dg-options "-O2" } */
3356
+/* { dg-add-options arm_neon } */
3358
+typedef long long i64;
3359
+typedef unsigned long long u64;
3360
+typedef unsigned int u32;
3363
+/* Unary operators */
3364
+#define UNARY_OP(name, op) \
3365
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
3367
+/* Binary operators */
3368
+#define BINARY_OP(name, op) \
3369
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
3371
+/* Unsigned shift */
3372
+#define SHIFT_U(name, op, amount) \
3373
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
3376
+#define SHIFT_S(name, op, amount) \
3377
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
3387
+SHIFT_U(right1, >>, 1)
3388
+SHIFT_U(right2, >>, 2)
3389
+SHIFT_U(right5, >>, 5)
3390
+SHIFT_U(rightn, >>, c)
3392
+SHIFT_S(right1, >>, 1)
3393
+SHIFT_S(right2, >>, 2)
3394
+SHIFT_S(right5, >>, 5)
3395
+SHIFT_S(rightn, >>, c)
3397
+/* { dg-final {scan-assembler-times "vmvn" 0} } */
3398
+/* { dg-final {scan-assembler-times "vadd" 0} } */
3399
+/* { dg-final {scan-assembler-times "vsub" 0} } */
3400
+/* { dg-final {scan-assembler-times "vand" 0} } */
3401
+/* { dg-final {scan-assembler-times "vorr" 0} } */
3402
+/* { dg-final {scan-assembler-times "veor" 0} } */
3403
+/* { dg-final {scan-assembler-times "vshr" 0} } */
3404
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
3405
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
3411
+char dest[16] = { 0 };
3413
void aligned_dest (char *src)
3416
/* Expect a multi-word store for the main part of the copy, but subword
3417
loads/stores for the remainder. */
3419
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
3420
+/* { dg-final { scan-assembler-times "ldmia" 0 } } */
3421
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
3422
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3423
+/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */
3424
/* { dg-final { scan-assembler-times "ldrh" 1 } } */
3425
/* { dg-final { scan-assembler-times "strh" 1 } } */
3426
/* { dg-final { scan-assembler-times "ldrb" 1 } } */
3427
--- a/src/gcc/testsuite/gcc.target/arm/negdi-3.c
3428
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-3.c
3430
+/* { dg-do compile } */
3431
+/* { dg-require-effective-target arm32 } */
3432
+/* { dg-options "-O2" } */
3434
+signed long long negdi_zero_extendsidi (unsigned int x)
3436
+ return -((signed long long) x);
3443
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
3444
+/* { dg-final { scan-assembler-times "sbc" 1 } } */
3445
+/* { dg-final { scan-assembler-times "mov" 0 } } */
3446
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
3447
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
3448
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
3450
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3451
+/* { dg-do compile } */
3452
+/* { dg-options "-O2" } */
3453
+/* { dg-add-options arm_arch_v8a } */
3455
+#include "../aarch64/atomic-op-acq_rel.x"
3457
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3458
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3459
+/* { dg-final { scan-assembler-not "dmb" } } */
3460
--- a/src/gcc/testsuite/gcc.target/arm/vselltsf.c
3461
+++ b/src/gcc/testsuite/gcc.target/arm/vselltsf.c
3463
+/* { dg-do compile } */
3464
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3465
+/* { dg-options "-O2" } */
3466
+/* { dg-add-options arm_v8_vfp } */
3469
+foo (float x, float y)
3471
+ volatile int i = 0;
3472
+ return i < 0 ? x : y;
3475
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
3476
--- a/src/gcc/testsuite/gcc.target/arm/vselnedf.c
3477
+++ b/src/gcc/testsuite/gcc.target/arm/vselnedf.c
3479
+/* { dg-do compile } */
3480
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3481
+/* { dg-options "-O2" } */
3482
+/* { dg-add-options arm_v8_vfp } */
3485
+foo (double x, double y)
3487
+ volatile int i = 0;
3488
+ return i != 0 ? x : y;
3491
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
3492
--- a/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
3493
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
3495
+/* { dg-do compile } */
3496
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3497
+/* { dg-options "-O2" } */
3498
+/* { dg-add-options arm_v8_vfp } */
3501
+foo (double x, double y)
3503
+ return !__builtin_isunordered (x, y) ? x : y;
3506
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
3507
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
3508
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
3510
+/* { dg-do compile } */
3511
+/* { dg-require-effective-target arm_v8_neon_ok } */
3512
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
3513
+/* { dg-add-options arm_v8_neon } */
3518
+foo (float *output, float *input)
3521
+ /* Vectorizable. */
3522
+ for (i = 0; i < N; i++)
3523
+ output[i] = __builtin_truncf (input[i]);
3526
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_btruncf } } } */
3527
+/* { dg-final { cleanup-tree-dump "vect" } } */
3528
--- a/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
3529
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
3531
+/* { dg-do compile } */
3532
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3533
+/* { dg-options "-O2" } */
3534
+/* { dg-add-options arm_v8_vfp } */
3537
+foo (float x, float y)
3539
+ volatile int i = 0;
3540
+ return i == 0 ? x : y;
3543
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
3544
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
3545
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
3547
+/* Check that Neon is used to handle 64-bits scalar operations. */
3549
+/* { dg-do compile } */
3550
+/* { dg-require-effective-target arm_neon_ok } */
3551
+/* { dg-options "-O2 -mneon-for-64bits" } */
3552
+/* { dg-add-options arm_neon } */
3554
+typedef long long i64;
3555
+typedef unsigned long long u64;
3556
+typedef unsigned int u32;
3559
+/* Unary operators */
3560
+#define UNARY_OP(name, op) \
3561
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
3563
+/* Binary operators */
3564
+#define BINARY_OP(name, op) \
3565
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
3567
+/* Unsigned shift */
3568
+#define SHIFT_U(name, op, amount) \
3569
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
3572
+#define SHIFT_S(name, op, amount) \
3573
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
3583
+SHIFT_U(right1, >>, 1)
3584
+SHIFT_U(right2, >>, 2)
3585
+SHIFT_U(right5, >>, 5)
3586
+SHIFT_U(rightn, >>, c)
3588
+SHIFT_S(right1, >>, 1)
3589
+SHIFT_S(right2, >>, 2)
3590
+SHIFT_S(right5, >>, 5)
3591
+SHIFT_S(rightn, >>, c)
3593
+/* { dg-final {scan-assembler-times "vmvn" 1} } */
3594
+/* Two vadd: 1 in unary_not, 1 in binary_add */
3595
+/* { dg-final {scan-assembler-times "vadd" 2} } */
3596
+/* { dg-final {scan-assembler-times "vsub" 1} } */
3597
+/* { dg-final {scan-assembler-times "vand" 1} } */
3598
+/* { dg-final {scan-assembler-times "vorr" 1} } */
3599
+/* { dg-final {scan-assembler-times "veor" 1} } */
3600
+/* 6 vshr for right shifts by constant, and variable right shift uses
3601
+ vshl with a negative amount in register. */
3602
+/* { dg-final {scan-assembler-times "vshr" 6} } */
3603
+/* { dg-final {scan-assembler-times "vshl" 2} } */
3604
--- a/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
3605
+++ b/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
3607
+/* { dg-do compile } */
3608
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3609
+/* { dg-options "-O2" } */
3610
+/* { dg-add-options arm_v8_vfp } */
3613
+foo (double x, double y)
3615
+ return __builtin_isunordered (x, y) ? x : y;
3618
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
3619
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
3620
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
3626
+char src[16] = {0};
3628
void aligned_src (char *dest)
3631
/* Expect a multi-word load for the main part of the copy, but subword
3632
loads/stores for the remainder. */
3634
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
3635
-/* { dg-final { scan-assembler-times "ldrh" 1 } } */
3636
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3637
+/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */
3638
+/* { dg-final { scan-assembler-times "strd" 0 } } */
3639
+/* { dg-final { scan-assembler-times "stm" 0 } } */
3640
+/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3641
/* { dg-final { scan-assembler-times "strh" 1 } } */
3642
-/* { dg-final { scan-assembler-times "ldrb" 1 } } */
3643
+/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3644
/* { dg-final { scan-assembler-times "strb" 1 } } */
3645
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
3646
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
3648
+/* { dg-do compile } */
3649
+/* { dg-options "-O1" } */
3651
+long long muld(long long X, long long Y)
3656
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
3657
--- a/src/gcc/testsuite/gcc.target/arm/negdi-4.c
3658
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-4.c
3660
+/* { dg-do compile } */
3661
+/* { dg-require-effective-target arm32 } */
3662
+/* { dg-options "-O2" } */
3664
+signed long long negdi_extendsidi (signed int x)
3666
+ return -((signed long long) x);
3671
+ mov r1, r0, asr #31
3673
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
3674
+/* { dg-final { scan-assembler-times "asr" 1 } } */
3675
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
3676
--- a/src/gcc/testsuite/gcc.target/arm/vselltdf.c
3677
+++ b/src/gcc/testsuite/gcc.target/arm/vselltdf.c
3679
+/* { dg-do compile } */
3680
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3681
+/* { dg-options "-O2" } */
3682
+/* { dg-add-options arm_v8_vfp } */
3685
+foo (double x, double y)
3687
+ volatile int i = 0;
3688
+ return i < 0 ? x : y;
3691
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
3692
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
3693
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
3700
+char src[16] = { 0 };
3701
+char dest[16] = { 0 };
3703
void aligned_both (void)
3707
/* We know both src and dest to be aligned: expect multiword loads/stores. */
3709
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
3710
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
3711
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3712
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
3713
+/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */
3714
+/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */
3715
+/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */
3716
+/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */
3717
--- a/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
3718
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
3720
+/* { dg-do compile } */
3721
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3722
+/* { dg-options "-O2" } */
3723
+/* { dg-add-options arm_v8_vfp } */
3726
+foo (double x, double y)
3728
+ volatile int i = 0;
3729
+ return i == 0 ? x : y;
3732
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
3733
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
3734
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
3736
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3737
+/* { dg-do compile } */
3738
+/* { dg-options "-O2" } */
3739
+/* { dg-add-options arm_arch_v8a } */
3741
+#include "../aarch64/atomic-op-acquire.x"
3743
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3744
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3745
+/* { dg-final { scan-assembler-not "dmb" } } */
3746
--- a/src/gcc/testsuite/gcc.target/arm/vsellesf.c
3747
+++ b/src/gcc/testsuite/gcc.target/arm/vsellesf.c
3749
+/* { dg-do compile } */
3750
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3751
+/* { dg-options "-O2" } */
3752
+/* { dg-add-options arm_v8_vfp } */
3755
+foo (float x, float y)
3757
+ volatile int i = 0;
3758
+ return i <= 0 ? x : y;
3761
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
3762
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
3763
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
3765
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3766
+/* { dg-do compile } */
3767
+/* { dg-options "-O2" } */
3768
+/* { dg-add-options arm_arch_v8a } */
3770
+#include "../aarch64/atomic-op-int.x"
3772
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3773
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3774
+/* { dg-final { scan-assembler-not "dmb" } } */
3775
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
3776
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
3778
+/* { dg-require-effective-target arm_arch_v8a_ok } */
3779
+/* { dg-do compile } */
3780
+/* { dg-options "-O2" } */
3781
+/* { dg-add-options arm_arch_v8a } */
3783
+#include "../aarch64/atomic-op-short.x"
3785
+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3786
+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
3787
+/* { dg-final { scan-assembler-not "dmb" } } */
3788
--- a/src/gcc/testsuite/gcc.target/arm/pr40887.c
3789
+++ b/src/gcc/testsuite/gcc.target/arm/pr40887.c
3791
/* { dg-options "-O2 -march=armv5te" } */
3792
/* { dg-final { scan-assembler "blx" } } */
3794
-int (*indirect_func)();
3795
+int (*indirect_func)(int x);
3799
- return indirect_func();
3800
+ return indirect_func(20) + indirect_func (40);
3802
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
3803
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
3805
+/* { dg-do compile } */
3806
+/* { dg-require-effective-target arm_v8_neon_ok } */
3807
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
3808
+/* { dg-add-options arm_v8_neon } */
3813
+foo (float *output, float *input)
3816
+ /* Vectorizable. */
3817
+ for (i = 0; i < N; i++)
3818
+ output[i] = __builtin_ceilf (input[i]);
3821
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_ceilf } } } */
3822
+/* { dg-final { cleanup-tree-dump "vect" } } */
3823
--- a/src/gcc/testsuite/gcc.target/arm/vselledf.c
3824
+++ b/src/gcc/testsuite/gcc.target/arm/vselledf.c
3826
+/* { dg-do compile } */
3827
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3828
+/* { dg-options "-O2" } */
3829
+/* { dg-add-options arm_v8_vfp } */
3832
+foo (double x, double y)
3834
+ volatile int i = 0;
3835
+ return i <= 0 ? x : y;
3838
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
3839
--- a/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
3840
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
3842
+/* { dg-do compile } */
3843
+/* { dg-require-effective-target arm_v8_vfp_ok } */
3844
+/* { dg-options "-O2" } */
3845
+/* { dg-add-options arm_v8_vfp } */
3848
+foo (float x, float y)
3850
+ volatile int i = 0;
3851
+ return i > 0 ? x : y;
3854
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
3855
--- a/src/gcc/testsuite/gcc.target/arm/pr57637.c
3856
+++ b/src/gcc/testsuite/gcc.target/arm/pr57637.c
3858
+/* { dg-do run } */
3859
+/* { dg-options "-O2 -fno-inline" } */
3861
+typedef struct _GtkCssStyleProperty GtkCssStyleProperty;
3863
+struct _GtkCssStyleProperty
3865
+ int *initial_value;
3867
+ unsigned int inherit :1;
3868
+ unsigned int animated :1;
3869
+ unsigned int affects_size :1;
3870
+ unsigned int affects_font :1;
3872
+ int * parse_value;
3873
+ int * query_value;
3874
+ int * assign_value;
3878
+g_assertion_message_expr (const char *domain,
3882
+ const char *expr) __attribute__((__noreturn__));
3885
+g_assertion_message_expr (const char *domain,
3891
+ __builtin_abort ();
3894
+get_id (GtkCssStyleProperty *property)
3899
+_gtk_css_style_property_get_type ()
3904
+GtkCssStyleProperty *
3905
+g_object_new (int object_type,
3906
+ const char *first_property_name,
3909
+ return (GtkCssStyleProperty *) __builtin_malloc (sizeof (GtkCssStyleProperty));
3913
+ INHERIT = (1 << 0),
3914
+ ANIMATED = (1 << 1),
3915
+ RESIZE = (1 << 2),
3917
+} GtkStylePropertyFlags;
3921
+gtk_css_style_property_register (const char * name,
3927
+ int *assign_value,
3928
+ int *initial_value)
3930
+ GtkCssStyleProperty *node;
3934
+ if (__builtin_expect (__extension__ (
3936
+ int _g_boolean_var_;
3937
+ if (initial_value != ((void *)0))
3938
+ _g_boolean_var_ = 1;
3940
+ _g_boolean_var_ = 0;
3946
+ g_assertion_message_expr ("Gtk",
3947
+ "gtkcssstylepropertyimpl.c",
3949
+ ((const char*) (__PRETTY_FUNCTION__)),
3950
+ "initial_value != NULL");
3955
+ if (__builtin_expect (__extension__ (
3957
+ int _g_boolean_var_;
3958
+ if (parse_value != ((void *)0))
3959
+ _g_boolean_var_ = 1;
3961
+ _g_boolean_var_ = 0;
3967
+ g_assertion_message_expr ("Gtk",
3968
+ "gtkcssstylepropertyimpl.c",
3970
+ ((const char*) (__PRETTY_FUNCTION__)),
3971
+ "parse_value != NULL");
3976
+ if (__builtin_expect (__extension__ (
3978
+ int _g_boolean_var_;
3979
+ if (value_type == ((int) ((1) << (2)))
3980
+ || query_value != ((void *)0))
3981
+ _g_boolean_var_ = 1;
3983
+ _g_boolean_var_ = 0;
3989
+ g_assertion_message_expr ("Gtk",
3990
+ "gtkcssstylepropertyimpl.c",
3991
+ 87, ((const char*) (__PRETTY_FUNCTION__)),
3992
+ "value_type == NONE || query_value != NULL");
3995
+ /* FLAGS is changed in a cond_exec instruction with pr57637. */
4001
+ if (__builtin_expect (__extension__ (
4003
+ int _g_boolean_var_;
4004
+ if (value_type == ((1) << (2))
4005
+ || assign_value != ((void *)0))
4006
+ _g_boolean_var_ = 1;
4008
+ _g_boolean_var_ = 0;
4014
+ g_assertion_message_expr ("Gtk",
4015
+ "gtkcssstylepropertyimpl.c",
4016
+ 88, ((const char*) (__PRETTY_FUNCTION__)),
4017
+ "value_type == NONE || assign_value != NULL");
4020
+ node = g_object_new ((_gtk_css_style_property_get_type ()),
4021
+ "value-type", value_type,
4022
+ "affects-size", (flags & RESIZE) ? (0) : (!(0)),
4023
+ "affects-font", (flags & FONT) ? (!(0)) : (0),
4024
+ "animated", (flags & ANIMATED) ? (!(0)) : (0),
4025
+ "inherit", (flags & INHERIT) ? (!(0)) : (0),
4026
+ "initial-value", initial_value,
4030
+ node->parse_value = parse_value;
4031
+ node->query_value = query_value;
4032
+ node->assign_value = assign_value;
4036
+ if (__builtin_expect (__extension__ (
4038
+ int _g_boolean_var_;
4039
+ if (get_id (node) == expected_id)
4040
+ _g_boolean_var_ = 1;
4042
+ _g_boolean_var_ = 0;
4048
+ g_assertion_message_expr ("Gtk",
4049
+ "gtkcssstylepropertyimpl.c",
4051
+ ((const char*) (__PRETTY_FUNCTION__)),
4052
+ "get_id (node) == expected_id");
4058
+ gtk_css_style_property_register ("test", 1, 4, 15, &t, &t, &t, &t);
4061
+ __builtin_abort ();
4064
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
4065
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
4067
+/* { dg-do run } */
4068
+/* { dg-options "-O3 --save-temps" } */
4070
+#include <arm_neon.h>
4072
+#include <stdlib.h>
4075
+test_frecps_float32_t (void)
4078
+ float32_t value = 0.2;
4079
+ float32_t reciprocal = 5.0;
4080
+ float32_t step = vrecpes_f32 (value);
4081
+ /* 3 steps should give us within ~0.001 accuracy. */
4082
+ for (i = 0; i < 3; i++)
4083
+ step = step * vrecpss_f32 (step, value);
4085
+ return fabs (step - reciprocal) < 0.001;
4088
+/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
4089
+/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
4092
+test_frecps_float32x2_t (void)
4097
+ const float32_t value_pool[] = {0.2, 0.4};
4098
+ const float32_t reciprocal_pool[] = {5.0, 2.5};
4099
+ float32x2_t value = vld1_f32 (value_pool);
4100
+ float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
4102
+ float32x2_t step = vrecpe_f32 (value);
4103
+ /* 3 steps should give us within ~0.001 accuracy. */
4104
+ for (i = 0; i < 3; i++)
4105
+ step = step * vrecps_f32 (step, value);
4107
+ ret &= fabs (vget_lane_f32 (step, 0)
4108
+ - vget_lane_f32 (reciprocal, 0)) < 0.001;
4109
+ ret &= fabs (vget_lane_f32 (step, 1)
4110
+ - vget_lane_f32 (reciprocal, 1)) < 0.001;
4115
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
4116
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
4119
+test_frecps_float32x4_t (void)
4124
+ const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
4125
+ const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
4126
+ float32x4_t value = vld1q_f32 (value_pool);
4127
+ float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
4129
+ float32x4_t step = vrecpeq_f32 (value);
4130
+ /* 3 steps should give us within ~0.001 accuracy. */
4131
+ for (i = 0; i < 3; i++)
4132
+ step = step * vrecpsq_f32 (step, value);
4134
+ ret &= fabs (vgetq_lane_f32 (step, 0)
4135
+ - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
4136
+ ret &= fabs (vgetq_lane_f32 (step, 1)
4137
+ - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
4138
+ ret &= fabs (vgetq_lane_f32 (step, 2)
4139
+ - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
4140
+ ret &= fabs (vgetq_lane_f32 (step, 3)
4141
+ - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
4146
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
4147
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
4150
+test_frecps_float64_t (void)
4153
+ float64_t value = 0.2;
4154
+ float64_t reciprocal = 5.0;
4155
+ float64_t step = vrecped_f64 (value);
4156
+ /* 3 steps should give us within ~0.001 accuracy. */
4157
+ for (i = 0; i < 3; i++)
4158
+ step = step * vrecpsd_f64 (step, value);
4160
+ return fabs (step - reciprocal) < 0.001;
4163
+/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
4164
+/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
4167
+test_frecps_float64x2_t (void)
4172
+ const float64_t value_pool[] = {0.2, 0.4};
4173
+ const float64_t reciprocal_pool[] = {5.0, 2.5};
4174
+ float64x2_t value = vld1q_f64 (value_pool);
4175
+ float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
4177
+ float64x2_t step = vrecpeq_f64 (value);
4178
+ /* 3 steps should give us within ~0.001 accuracy. */
4179
+ for (i = 0; i < 3; i++)
4180
+ step = step * vrecpsq_f64 (step, value);
4182
+ ret &= fabs (vgetq_lane_f64 (step, 0)
4183
+ - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
4184
+ ret &= fabs (vgetq_lane_f64 (step, 1)
4185
+ - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
4190
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
4191
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
4194
+main (int argc, char **argv)
4196
+ if (!test_frecps_float32_t ())
4198
+ if (!test_frecps_float32x2_t ())
4200
+ if (!test_frecps_float32x4_t ())
4202
+ if (!test_frecps_float64_t ())
4204
+ if (!test_frecps_float64x2_t ())
4210
+/* { dg-final { cleanup-saved-temps } } */
4211
--- a/src/gcc/testsuite/gcc.target/aarch64/ands_2.c
4212
+++ b/src/gcc/testsuite/gcc.target/aarch64/ands_2.c
4214
+/* { dg-do run } */
4215
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4217
+extern void abort (void);
4220
+ands_si_test1 (int a, int b, int c)
4224
+ /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4225
+ /* { dg-final { scan-assembler-times "and\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */
4233
+ands_si_test2 (int a, int b, int c)
4235
+ int d = a & 0x99999999;
4237
+ /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, -1717986919" } } */
4238
+ /* { dg-final { scan-assembler "and\tw\[0-9\]+, w\[0-9\]+, -1717986919" } } */
4246
+ands_si_test3 (int a, int b, int c)
4248
+ int d = a & (b << 3);
4250
+ /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4251
+ /* { dg-final { scan-assembler "and\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4258
+typedef long long s64;
4261
+ands_di_test1 (s64 a, s64 b, s64 c)
4265
+ /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4266
+ /* { dg-final { scan-assembler-times "and\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */
4274
+ands_di_test2 (s64 a, s64 b, s64 c)
4276
+ s64 d = a & 0xaaaaaaaaaaaaaaaall;
4278
+ /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, -6148914691236517206" } } */
4279
+ /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -6148914691236517206" } } */
4287
+ands_di_test3 (s64 a, s64 b, s64 c)
4289
+ s64 d = a & (b << 3);
4291
+ /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4292
+ /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4305
+ x = ands_si_test1 (29, 4, 5);
4309
+ x = ands_si_test1 (5, 2, 20);
4313
+ x = ands_si_test2 (29, 4, 5);
4317
+ x = ands_si_test2 (1024, 2, 20);
4321
+ x = ands_si_test3 (35, 4, 5);
4325
+ x = ands_si_test3 (5, 2, 20);
4329
+ y = ands_di_test1 (0x130000029ll,
4333
+ if (y != ((0x130000029ll & 0x320000004ll) + 0x320000004ll + 0x505050505ll))
4336
+ y = ands_di_test1 (0x5000500050005ll,
4337
+ 0x2111211121112ll,
4338
+ 0x0000000002020ll);
4339
+ if (y != 0x5000500052025ll)
4342
+ y = ands_di_test2 (0x130000029ll,
4345
+ if (y != ((0x130000029ll & 0xaaaaaaaaaaaaaaaall) + 0x320000004ll + 0x505050505ll))
4348
+ y = ands_di_test2 (0x540004100ll,
4351
+ if (y != (0x540004100ll + 0x805050205ll))
4354
+ y = ands_di_test3 (0x130000029ll,
4357
+ if (y != ((0x130000029ll & (0x064000008ll << 3))
4358
+ + 0x064000008ll + 0x505050505ll))
4361
+ y = ands_di_test3 (0x130002900ll,
4364
+ if (y != (0x130002900ll + 0x505050505ll))
4370
+/* { dg-final { cleanup-saved-temps } } */
4371
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
4372
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
4374
+/* { dg-do run } */
4375
+/* { dg-options "-O3 --save-temps" } */
4377
+#include <arm_neon.h>
4379
+extern void abort (void);
4380
+extern float fabsf (float);
4381
+extern double fabs (double);
4383
+#define NUM_TESTS 8
4385
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
4386
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
4387
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
4388
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
4390
+#define TEST(TEST, CMP, SUFFIX, WIDTH, F) \
4392
+test_fca##TEST##SUFFIX##_float##WIDTH##_t (void) \
4396
+ uint##WIDTH##_t output[NUM_TESTS]; \
4398
+ for (i = 0; i < NUM_TESTS; i++) \
4400
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
4401
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
4402
+ /* Inhibit optimization of our linear test loop. */ \
4403
+ asm volatile ("" : : : "memory"); \
4404
+ output[i] = f1 CMP f2 ? -1 : 0; \
4407
+ for (i = 0; i < NUM_TESTS; i++) \
4409
+ output[i] = vca##TEST##SUFFIX##_f##WIDTH (input_##SUFFIX##1[i], \
4410
+ input_##SUFFIX##2[i]) \
4412
+ /* Inhibit autovectorization of our scalar test loop. */ \
4413
+ asm volatile ("" : : : "memory"); \
4416
+ for (i = 0; i < NUM_TESTS; i++) \
4417
+ ret |= output[i]; \
4422
+TEST (ge, >=, s, 32, f)
4423
+/* { dg-final { scan-assembler "facge\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
4424
+TEST (ge, >=, d, 64, )
4425
+/* { dg-final { scan-assembler "facge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
4426
+TEST (gt, >, s, 32, f)
4427
+/* { dg-final { scan-assembler "facgt\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
4428
+TEST (gt, >, d, 64, )
4429
+/* { dg-final { scan-assembler "facgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
4432
+main (int argc, char **argv)
4434
+ if (test_fcages_float32_t ())
4436
+ if (test_fcaged_float64_t ())
4438
+ if (test_fcagts_float32_t ())
4440
+ if (test_fcagtd_float64_t ())
4445
+/* { dg-final { cleanup-saved-temps } } */
4446
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
4447
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
4452
+atomic_fetch_add_ACQ_REL (int a)
4454
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
4458
+atomic_fetch_sub_ACQ_REL (int a)
4460
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
4464
+atomic_fetch_and_ACQ_REL (int a)
4466
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
4470
+atomic_fetch_nand_ACQ_REL (int a)
4472
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
4476
+atomic_fetch_xor_ACQ_REL (int a)
4478
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
4482
+atomic_fetch_or_ACQ_REL (int a)
4484
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
4486
--- a/src/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c
4487
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c
4489
+/* { dg-do run } */
4490
+/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model" } */
4492
+typedef signed char S8_t;
4493
+typedef signed short S16_t;
4494
+typedef signed int S32_t;
4495
+typedef signed long S64_t;
4496
+typedef signed char *__restrict__ pS8_t;
4497
+typedef signed short *__restrict__ pS16_t;
4498
+typedef signed int *__restrict__ pS32_t;
4499
+typedef signed long *__restrict__ pS64_t;
4500
+typedef unsigned char U8_t;
4501
+typedef unsigned short U16_t;
4502
+typedef unsigned int U32_t;
4503
+typedef unsigned long U64_t;
4504
+typedef unsigned char *__restrict__ pU8_t;
4505
+typedef unsigned short *__restrict__ pU16_t;
4506
+typedef unsigned int *__restrict__ pU32_t;
4507
+typedef unsigned long *__restrict__ pU64_t;
4509
+extern void abort ();
4512
+test_addS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
4515
+ for (i = 0; i < 4; i++)
4516
+ a[i] += (S64_t) b[i] * (S64_t) c[i];
4519
+/* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.2d" } } */
4520
+/* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.2d" } } */
4523
+test_addS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
4526
+ for (i = 0; i < 8; i++)
4527
+ a[i] += (S32_t) b[i] * (S32_t) c[i];
4530
+/* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.4s" } } */
4531
+/* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.4s" } } */
4534
+test_addS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
4537
+ for (i = 0; i < 16; i++)
4538
+ a[i] += (S16_t) b[i] * (S16_t) c[i];
4542
+test_addS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
4545
+ for (i = 0; i < 16; i++)
4546
+ a[i] += (S16_t) -b[i] * (S16_t) -c[i];
4550
+test_addS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
4553
+ for (i = 0; i < 16; i++)
4554
+ a[i] -= (S16_t) b[i] * (S16_t) -c[i];
4558
+test_addS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
4561
+ for (i = 0; i < 16; i++)
4562
+ a[i] -= (S16_t) -b[i] * (S16_t) c[i];
4565
+/* { dg-final { scan-assembler-times "smlal\tv\[0-9\]+\.8h" 4 } } */
4566
+/* { dg-final { scan-assembler-times "smlal2\tv\[0-9\]+\.8h" 4 } } */
4569
+test_subS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
4572
+ for (i = 0; i < 4; i++)
4573
+ a[i] -= (S64_t) b[i] * (S64_t) c[i];
4576
+/* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.2d" } } */
4577
+/* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.2d" } } */
4580
+test_subS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
4583
+ for (i = 0; i < 8; i++)
4584
+ a[i] -= (S32_t) b[i] * (S32_t) c[i];
4587
+/* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.4s" } } */
4588
+/* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.4s" } } */
4591
+test_subS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
4594
+ for (i = 0; i < 16; i++)
4595
+ a[i] -= (S16_t) b[i] * (S16_t) c[i];
4599
+test_subS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
4602
+ for (i = 0; i < 16; i++)
4603
+ a[i] += (S16_t) -b[i] * (S16_t) c[i];
4607
+test_subS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
4610
+ for (i = 0; i < 16; i++)
4611
+ a[i] += (S16_t) b[i] * (S16_t) -c[i];
4615
+test_subS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
4618
+ for (i = 0; i < 16; i++)
4619
+ a[i] += -((S16_t) b[i] * (S16_t) c[i]);
4623
+test_subS16_tS8_t16_neg3 (pS16_t a, pS8_t b, pS8_t c)
4626
+ for (i = 0; i < 16; i++)
4627
+ a[i] -= (S16_t) -b[i] * (S16_t) -c[i];
4630
+/* { dg-final { scan-assembler-times "smlsl\tv\[0-9\]+\.8h" 5 } } */
4631
+/* { dg-final { scan-assembler-times "smlsl2\tv\[0-9\]+\.8h" 5 } } */
4634
+test_addU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
4637
+ for (i = 0; i < 4; i++)
4638
+ a[i] += (U64_t) b[i] * (U64_t) c[i];
4641
+/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.2d" } } */
4642
+/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.2d" } } */
4645
+test_addU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
4648
+ for (i = 0; i < 8; i++)
4649
+ a[i] += (U32_t) b[i] * (U32_t) c[i];
4652
+/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.4s" } } */
4653
+/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.4s" } } */
4656
+test_addU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
4659
+ for (i = 0; i < 16; i++)
4660
+ a[i] += (U16_t) b[i] * (U16_t) c[i];
4663
+/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.8h" } } */
4664
+/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.8h" } } */
4667
+test_subU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
4670
+ for (i = 0; i < 4; i++)
4671
+ a[i] -= (U64_t) b[i] * (U64_t) c[i];
4674
+/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.2d" } } */
4675
+/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.2d" } } */
4678
+test_subU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
4681
+ for (i = 0; i < 8; i++)
4682
+ a[i] -= (U32_t) b[i] * (U32_t) c[i];
4685
+/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.4s" } } */
4686
+/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.4s" } } */
4689
+test_subU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
4692
+ for (i = 0; i < 16; i++)
4693
+ a[i] -= (U16_t) b[i] * (U16_t) c[i];
4696
+/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.8h" } } */
4697
+/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.8h" } } */
4700
+S64_t add_rS64[4] = { 6, 7, -4, -3 };
4701
+S32_t add_rS32[8] = { 6, 7, -4, -3, 10, 11, 0, 1 };
4702
+S16_t add_rS16[16] =
4703
+ { 6, 7, -4, -3, 10, 11, 0, 1, 14, 15, 4, 5, 18, 19, 8, 9 };
4705
+S64_t sub_rS64[4] = { 0, 1, 2, 3 };
4706
+S32_t sub_rS32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
4707
+S16_t sub_rS16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4709
+U64_t add_rU64[4] = { 0x6, 0x7, 0x2fffffffc, 0x2fffffffd };
4711
+U32_t add_rU32[8] =
4713
+ 0x6, 0x7, 0x2fffc, 0x2fffd,
4714
+ 0xa, 0xb, 0x30000, 0x30001
4717
+U16_t add_rU16[16] =
4719
+ 0x6, 0x7, 0x2fc, 0x2fd, 0xa, 0xb, 0x300, 0x301,
4720
+ 0xe, 0xf, 0x304, 0x305, 0x12, 0x13, 0x308, 0x309
4723
+U64_t sub_rU64[4] = { 0, 1, 2, 3 };
4724
+U32_t sub_rU32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
4725
+U16_t sub_rU16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4727
+S8_t neg_r[16] = { -6, -5, 8, 9, -2, -1, 12, 13, 2, 3, 16, 17, 6, 7, 20, 21 };
4729
+S64_t S64_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4730
+S32_t S32_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
4731
+S32_t S32_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
4733
+S32_t S32_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4734
+S16_t S16_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
4735
+S16_t S16_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
4737
+S16_t S16_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4738
+S8_t S8_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
4739
+S8_t S8_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
4742
+#define CHECK(T,N,AS,US) \
4745
+ for (i = 0; i < N; i++) \
4746
+ if (S##T##_ta[i] != AS##_r##US##T[i]) \
4751
+#define SCHECK(T,N,AS) CHECK(T,N,AS,S)
4752
+#define UCHECK(T,N,AS) CHECK(T,N,AS,U)
4754
+#define NCHECK(RES) \
4757
+ for (i = 0; i < 16; i++) \
4758
+ if (S16_ta[i] != RES[i]) \
4769
+ test_addS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
4770
+ SCHECK (64, 4, add);
4771
+ test_addS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
4772
+ SCHECK (32, 8, add);
4773
+ test_addS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
4774
+ SCHECK (16, 16, add);
4775
+ test_subS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
4776
+ SCHECK (64, 4, sub);
4777
+ test_subS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
4778
+ SCHECK (32, 8, sub);
4779
+ test_subS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
4780
+ SCHECK (16, 16, sub);
4782
+ test_addU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
4783
+ UCHECK (64, 4, add);
4784
+ test_addU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
4785
+ UCHECK (32, 8, add);
4786
+ test_addU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
4787
+ UCHECK (16, 16, add);
4788
+ test_subU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
4789
+ UCHECK (64, 4, sub);
4790
+ test_subU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
4791
+ UCHECK (32, 8, sub);
4792
+ test_subU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
4793
+ UCHECK (16, 16, sub);
4795
+ test_addS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
4796
+ NCHECK (add_rS16);
4797
+ test_subS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
4798
+ NCHECK (sub_rS16);
4799
+ test_addS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
4800
+ NCHECK (add_rS16);
4801
+ test_subS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
4802
+ NCHECK (sub_rS16);
4803
+ test_addS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
4804
+ NCHECK (add_rS16);
4805
+ test_subS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
4806
+ NCHECK (sub_rS16);
4807
+ test_subS16_tS8_t16_neg3 (S16_ta, S8_tb, S8_tc);
4813
+/* { dg-final { cleanup-saved-temps } } */
4814
--- a/src/gcc/testsuite/gcc.target/aarch64/extr.c
4815
+++ b/src/gcc/testsuite/gcc.target/aarch64/extr.c
4817
+/* { dg-options "-O2 --save-temps" } */
4818
+/* { dg-do run } */
4820
+extern void abort (void);
4823
+test_si (int a, int b)
4825
+ /* { dg-final { scan-assembler "extr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, 27\n" } } */
4826
+ return (a << 5) | ((unsigned int) b >> 27);
4830
+test_di (long long a, long long b)
4832
+ /* { dg-final { scan-assembler "extr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, 45\n" } } */
4833
+ return (a << 19) | ((unsigned long long) b >> 45);
4841
+ v = test_si (0x00000004, 0x30000000);
4842
+ if (v != 0x00000086)
4844
+ w = test_di (0x0001040040040004ll, 0x0070050066666666ll);
4845
+ if (w != 0x2002002000200380ll)
4850
+/* { dg-final { cleanup-saved-temps } } */
4851
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
4852
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
4854
/* { dg-final { scan-assembler "uminv" } } */
4855
/* { dg-final { scan-assembler "smaxv" } } */
4856
/* { dg-final { scan-assembler "sminv" } } */
4857
+/* { dg-final { scan-assembler "sabd" } } */
4858
+/* { dg-final { scan-assembler "saba" } } */
4859
/* { dg-final { scan-assembler-times "addv" 2} } */
4860
/* { dg-final { scan-assembler-times "addp" 2} } */
4861
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
4862
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
4864
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
4866
#define FTYPE double
4871
#include "vect-fcm.x"
4873
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
4874
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
4875
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4876
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
4877
/* { dg-final { cleanup-tree-dump "vect" } } */
4878
--- a/src/gcc/testsuite/gcc.target/aarch64/adds3.c
4879
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds3.c
4881
+/* { dg-do run } */
4882
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4884
+extern void abort (void);
4885
+typedef long long s64;
4888
+adds_ext (s64 a, int b, int c)
4899
+adds_shift_ext (s64 a, int b, int c)
4901
+ s64 d = (a + ((s64)b << 3));
4914
+ x = adds_ext (0x13000002ll, 41, 15);
4915
+ if (x != 318767203)
4918
+ x = adds_ext (0x50505050ll, 29, 4);
4919
+ if (x != 1347440782)
4922
+ x = adds_ext (0x12121212121ll, 2, 14);
4923
+ if (x != 555819315)
4926
+ x = adds_shift_ext (0x123456789ll, 4, 12);
4927
+ if (x != 591751097)
4930
+ x = adds_shift_ext (0x02020202ll, 9, 8);
4931
+ if (x != 33686107)
4934
+ x = adds_shift_ext (0x987987987987ll, 23, 41);
4935
+ if (x != -2020050305)
4941
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
4942
--- a/src/gcc/testsuite/gcc.target/aarch64/subs2.c
4943
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs2.c
4945
+/* { dg-do run } */
4946
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4948
+extern void abort (void);
4951
+subs_si_test1 (int a, int b, int c)
4955
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4956
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4964
+subs_si_test2 (int a, int b, int c)
4966
+ int d = a - 0xfff;
4968
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
4969
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
4977
+subs_si_test3 (int a, int b, int c)
4979
+ int d = a - (b << 3);
4981
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4982
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4989
+typedef long long s64;
4992
+subs_di_test1 (s64 a, s64 b, s64 c)
4996
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4997
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
5005
+subs_di_test2 (s64 a, s64 b, s64 c)
5007
+ s64 d = a - 0x1000ll;
5009
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
5010
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
5018
+subs_di_test3 (s64 a, s64 b, s64 c)
5020
+ s64 d = a - (b << 3);
5022
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
5023
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
5035
+ x = subs_si_test1 (29, 4, 5);
5039
+ x = subs_si_test1 (5, 2, 20);
5043
+ x = subs_si_test2 (29, 4, 5);
5047
+ x = subs_si_test2 (1024, 2, 20);
5051
+ x = subs_si_test3 (35, 4, 5);
5055
+ x = subs_si_test3 (5, 2, 20);
5059
+ y = subs_di_test1 (0x130000029ll,
5063
+ if (y != 0x63505052e)
5066
+ y = subs_di_test1 (0x5000500050005ll,
5067
+ 0x2111211121112ll,
5068
+ 0x0000000002020ll);
5069
+ if (y != 0x5000500052025)
5072
+ y = subs_di_test2 (0x130000029ll,
5075
+ if (y != 0x95504f532)
5078
+ y = subs_di_test2 (0x540004100ll,
5081
+ if (y != 0x1065053309)
5084
+ y = subs_di_test3 (0x130000029ll,
5087
+ if (y != 0x63505052e)
5090
+ y = subs_di_test3 (0x130002900ll,
5093
+ if (y != 0x635052e05)
5099
+/* { dg-final { cleanup-saved-temps } } */
5100
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c
5101
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c
5103
+/* { dg-do run } */
5104
+/* { dg-options "-O3 --save-temps -ffast-math" } */
5106
+#include <arm_neon.h>
5108
+extern void abort (void);
5110
+#define NUM_TESTS 16
5111
+#define DELTA 0.000001
5113
+int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
5114
+ -4, 34, 110, -110, 6, 4, 75, -34};
5115
+int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
5116
+ -4, 34, 110, -110, 6, 4, 75, -34};
5117
+int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
5118
+ -4, 34, 110, -110, 6, 4, 75, -34};
5120
+uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
5121
+ 4, 34, 110, 110, 6, 4, 75, 34};
5122
+uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
5123
+ 4, 34, 110, 110, 6, 4, 75, 34};
5124
+uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
5125
+ 4, 34, 110, 110, 6, 4, 75, 34};
5127
+#define EQUAL(a, b) (a == b)
5129
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
5131
+test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
5134
+ int moves = (NUM_TESTS - LANES) + 1; \
5135
+ TYPE##_t out_l[NUM_TESTS]; \
5136
+ TYPE##_t out_v[NUM_TESTS]; \
5138
+ /* Calculate linearly. */ \
5139
+ for (i = 0; i < moves; i++) \
5141
+ out_l[i] = input_##TYPE[i]; \
5142
+ for (j = 0; j < LANES; j++) \
5143
+ out_l[i] = input_##TYPE[i + j] CMP_OP out_l[i] ? \
5144
+ input_##TYPE[i + j] : out_l[i]; \
5147
+ /* Calculate using vector reduction intrinsics. */ \
5148
+ for (i = 0; i < moves; i++) \
5150
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
5151
+ out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
5155
+ for (i = 0; i < moves; i++) \
5157
+ if (!EQUAL (out_v[i], out_l[i])) \
5163
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64) \
5164
+TEST (max, >, STYPE, , TYPE, W32) \
5165
+TEST (max, >, STYPE, q, TYPE, W64) \
5166
+TEST (min, <, STYPE, , TYPE, W32) \
5167
+TEST (min, <, STYPE, q, TYPE, W64)
5169
+BUILD_VARIANTS (int8, s8, 8, 16)
5170
+/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
5171
+/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
5172
+/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
5173
+/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
5174
+BUILD_VARIANTS (uint8, u8, 8, 16)
5175
+/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
5176
+/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
5177
+/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
5178
+/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
5179
+BUILD_VARIANTS (int16, s16, 4, 8)
5180
+/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
5181
+/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
5182
+/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
5183
+/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
5184
+BUILD_VARIANTS (uint16, u16, 4, 8)
5185
+/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
5186
+/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
5187
+/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
5188
+/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
5189
+BUILD_VARIANTS (int32, s32, 2, 4)
5190
+/* { dg-final { scan-assembler "smaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5191
+/* { dg-final { scan-assembler "sminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5192
+/* { dg-final { scan-assembler "smaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5193
+/* { dg-final { scan-assembler "sminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5194
+BUILD_VARIANTS (uint32, u32, 2, 4)
5195
+/* { dg-final { scan-assembler "umaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5196
+/* { dg-final { scan-assembler "uminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5197
+/* { dg-final { scan-assembler "umaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5198
+/* { dg-final { scan-assembler "uminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5201
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
5203
+ if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
5208
+main (int argc, char **argv)
5210
+ BUILD_VARIANTS (int8, s8, 8, 16)
5211
+ BUILD_VARIANTS (uint8, u8, 8, 16)
5212
+ BUILD_VARIANTS (int16, s16, 4, 8)
5213
+ BUILD_VARIANTS (uint16, u16, 4, 8)
5214
+ BUILD_VARIANTS (int32, s32, 2, 4)
5215
+ BUILD_VARIANTS (uint32, u32, 2, 4)
5219
+/* { dg-final { cleanup-saved-temps } } */
5220
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
5221
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
5223
+/* { dg-do run } */
5224
+/* { dg-options "-O3 --save-temps" } */
5226
+#include <arm_neon.h>
5228
+#include <stdlib.h>
5231
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
5232
+float32_t rec_f[] =
5233
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
5235
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
5236
+float32_t rec_d[] =
5237
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
5240
+test_frecpx_float32_t (void)
5244
+ for (i = 0; i < 8; i++)
5245
+ ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001;
5250
+/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */
5253
+test_frecpx_float64_t (void)
5257
+ for (i = 0; i < 8; i++)
5258
+ ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001;
5263
+/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */
5266
+main (int argc, char **argv)
5268
+ if (!test_frecpx_float32_t ())
5270
+ if (!test_frecpx_float64_t ())
5276
+/* { dg-final { cleanup-saved-temps } } */
5277
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
5278
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
5280
+/* { dg-do run } */
5281
+/* { dg-options "-O3 --save-temps" } */
5283
+#include <arm_neon.h>
5285
+extern void abort (void);
5286
+extern float fabsf (float);
5287
+extern double fabs (double);
5289
+#define NUM_TESTS 8
5291
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
5292
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
5293
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
5294
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
5296
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
5298
+test_vca##T##_float##WIDTH##x##LANES##_t (void) \
5302
+ uint##WIDTH##_t output[NUM_TESTS]; \
5304
+ for (i = 0; i < NUM_TESTS; i++) \
5306
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
5307
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
5308
+ /* Inhibit optimization of our linear test loop. */ \
5309
+ asm volatile ("" : : : "memory"); \
5310
+ output[i] = f1 CMP f2 ? -1 : 0; \
5313
+ for (i = 0; i < NUM_TESTS; i += LANES) \
5315
+ float##WIDTH##x##LANES##_t in1 = \
5316
+ vld1##Q##_f##WIDTH (input_##SUFFIX##1 + i); \
5317
+ float##WIDTH##x##LANES##_t in2 = \
5318
+ vld1##Q##_f##WIDTH (input_##SUFFIX##2 + i); \
5319
+ uint##WIDTH##x##LANES##_t expected_out = \
5320
+ vld1##Q##_u##WIDTH (output + i); \
5321
+ uint##WIDTH##x##LANES##_t out = \
5322
+ veor##Q##_u##WIDTH (vca##T##Q##_f##WIDTH (in1, in2), \
5324
+ vst1##Q##_u##WIDTH (output + i, out); \
5327
+ for (i = 0; i < NUM_TESTS; i++) \
5328
+ ret |= output[i]; \
5333
+#define BUILD_VARIANTS(T, CMP) \
5334
+TEST (T, CMP, s, 32, 2, , f) \
5335
+TEST (T, CMP, s, 32, 4, q, f) \
5336
+TEST (T, CMP, d, 64, 2, q, )
5338
+BUILD_VARIANTS (ge, >=)
5339
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5340
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5341
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5343
+BUILD_VARIANTS (gt, >)
5344
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5345
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5346
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5348
+/* No need for another scan-assembler as these tests
5349
+ also generate facge, facgt instructions. */
5350
+BUILD_VARIANTS (le, <=)
5351
+BUILD_VARIANTS (lt, <)
5354
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
5355
+if (test_vca##T##_float##WIDTH##x##LANES##_t ()) \
5359
+main (int argc, char **argv)
5361
+BUILD_VARIANTS (ge, >=)
5362
+BUILD_VARIANTS (gt, >)
5363
+BUILD_VARIANTS (le, <=)
5364
+BUILD_VARIANTS (lt, <)
5368
+/* { dg-final { cleanup-saved-temps } } */
5369
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
5370
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
5372
+/* { dg-do run } */
5373
+/* { dg-options "-O3 --save-temps" } */
5375
+#include <arm_neon.h>
5377
+extern void abort (void);
5378
+extern float fabsf (float);
5379
+extern double fabs (double);
5381
+extern double trunc (double);
5382
+extern double round (double);
5383
+extern double nearbyint (double);
5384
+extern double floor (double);
5385
+extern double ceil (double);
5386
+extern double rint (double);
5388
+extern float truncf (float);
5389
+extern float roundf (float);
5390
+extern float nearbyintf (float);
5391
+extern float floorf (float);
5392
+extern float ceilf (float);
5393
+extern float rintf (float);
5395
+#define NUM_TESTS 8
5396
+#define DELTA 0.000001
5398
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
5399
+ 200.0f, -800.0f, -13.0f, -0.5f};
5400
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
5401
+ 200.0, -800.0, -13.0, -0.5};
5403
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
5405
+test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \
5409
+ int nlanes = LANES; \
5410
+ float##WIDTH##_t expected_out[NUM_TESTS]; \
5411
+ float##WIDTH##_t actual_out[NUM_TESTS]; \
5413
+ for (i = 0; i < NUM_TESTS; i++) \
5415
+ expected_out[i] = C_FN##F (input_f##WIDTH[i]); \
5416
+ /* Don't vectorize this. */ \
5417
+ asm volatile ("" : : : "memory"); \
5420
+ /* Prevent the compiler from noticing these two loops do the same \
5421
+ thing and optimizing away the comparison. */ \
5422
+ asm volatile ("" : : : "memory"); \
5424
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
5426
+ float##WIDTH##x##LANES##_t out = \
5427
+ vrnd##SUFFIX##Q##_f##WIDTH \
5428
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
5429
+ vst1##Q##_f##WIDTH (actual_out + i, out); \
5432
+ for (i = 0; i < NUM_TESTS; i++) \
5433
+ ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \
5439
+#define BUILD_VARIANTS(SUFFIX, C_FN) \
5440
+TEST (SUFFIX, , 32, 2, C_FN, f) \
5441
+TEST (SUFFIX, q, 32, 4, C_FN, f) \
5442
+TEST (SUFFIX, q, 64, 2, C_FN, ) \
5444
+BUILD_VARIANTS ( , trunc)
5445
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5446
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5447
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5448
+BUILD_VARIANTS (a, round)
5449
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5450
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5451
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5452
+BUILD_VARIANTS (i, nearbyint)
5453
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5454
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5455
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5456
+BUILD_VARIANTS (m, floor)
5457
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5458
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5459
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5460
+BUILD_VARIANTS (p, ceil)
5461
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5462
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5463
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5464
+BUILD_VARIANTS (x, rint)
5465
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5466
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5467
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5470
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
5472
+ if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \
5477
+main (int argc, char **argv)
5479
+ BUILD_VARIANTS ( , trunc)
5480
+ BUILD_VARIANTS (a, round)
5481
+ BUILD_VARIANTS (i, nearbyint)
5482
+ BUILD_VARIANTS (m, floor)
5483
+ BUILD_VARIANTS (p, ceil)
5484
+ BUILD_VARIANTS (x, rint)
5488
+/* { dg-final { cleanup-saved-temps } } */
5489
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
5490
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
5492
/* { dg-do compile } */
5493
/* { dg-options "-O2" } */
5496
+#include "atomic-op-relaxed.x"
5499
-atomic_fetch_add_RELAXED (int a)
5501
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
5505
-atomic_fetch_sub_RELAXED (int a)
5507
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
5511
-atomic_fetch_and_RELAXED (int a)
5513
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
5517
-atomic_fetch_nand_RELAXED (int a)
5519
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
5523
-atomic_fetch_xor_RELAXED (int a)
5525
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
5529
-atomic_fetch_or_RELAXED (int a)
5531
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
5534
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5535
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5536
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
5537
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
5539
2.0, -4.0, 8.0, -16.0,
5540
-2.125, 4.25, -8.5, 17.0};
5542
+/* Float comparisons, float results. */
5545
foo (FTYPE *in1, FTYPE *in2, FTYPE *output)
5548
output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
5551
+/* Float comparisons, int results. */
5554
+foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
5557
+ /* Vectorizable. */
5558
+ for (i = 0; i < N; i++)
5559
+ output[i] = (in1[i] OP in2[i]) ? 2 : 4;
5563
+bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
5566
+ /* Vectorizable. */
5567
+ for (i = 0; i < N; i++)
5568
+ output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2;
5572
+foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
5575
+ /* Vectorizable. */
5576
+ for (i = 0; i < N; i++)
5577
+ output[i] = (in1[i] OP 0.0) ? 4 : 2;
5581
+foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
5584
+ /* Vectorizable. */
5585
+ for (i = 0; i < N; i++)
5586
+ output[i] = (in1[i] INV_OP 0.0) ? 4 : 2;
5590
main (int argc, char **argv)
5598
foo (input1, input2, out1);
5599
bar (input1, input2, out2);
5601
for (i = 0; i < N; i++)
5602
if (out1[i] == out2[i])
5605
+ foo_int (input1, input2, outi1);
5606
+ bar_int (input1, input2, outi2);
5607
+ for (i = 0; i < N; i++)
5608
+ if (outi1[i] != outi2[i])
5610
+ foobar_int (input1, input2, outi1);
5611
+ foobarbar_int (input1, input2, outi2);
5612
+ for (i = 0; i < N; i++)
5613
+ if (outi1[i] == outi2[i])
5618
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
5619
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
5622
+/* { dg-do compile } */
5623
+/* { dg-options "-O3" } */
5625
+#include "arm_neon.h"
5627
+#include "vaddv-intrinsic.x"
5629
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
5630
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
5631
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
5632
--- a/src/gcc/testsuite/gcc.target/aarch64/movi_1.c
5633
+++ b/src/gcc/testsuite/gcc.target/aarch64/movi_1.c
5635
+/* { dg-do compile } */
5636
+/* { dg-options "-O2" } */
5641
+ /* { dg-final { scan-assembler "movi\tv\[0-9\]+\.4h, 0x4, lsl 8" } } */
5642
+ /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 0x400" } } */
5643
+ /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 1024" } } */
5644
+ register short x asm ("h8") = 1024;
5645
+ asm volatile ("" : : "w" (x));
5648
--- a/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
5649
+++ b/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
5651
+/* { dg-do run } */
5652
+/* { dg-options "-O3 --save-temps" } */
5654
+#include <arm_neon.h>
5656
+extern void abort (void);
5658
+#define ETYPE(size) int##size##_t
5659
+#define VTYPE(size, lanes) int##size##x##lanes##_t
5661
+#define TEST_VABS(q, size, lanes) \
5663
+test_vabs##q##_##size (ETYPE (size) * res, \
5664
+ const ETYPE (size) *in1) \
5666
+ VTYPE (size, lanes) a = vld1##q##_s##size (res); \
5667
+ VTYPE (size, lanes) b = vld1##q##_s##size (in1); \
5668
+ a = vabs##q##_s##size (b); \
5669
+ vst1##q##_s##size (res, a); \
5672
+#define BUILD_VARS(width, n_lanes, n_half_lanes) \
5673
+TEST_VABS (, width, n_half_lanes) \
5674
+TEST_VABS (q, width, n_lanes) \
5676
+BUILD_VARS (64, 2, 1)
5677
+BUILD_VARS (32, 4, 2)
5678
+BUILD_VARS (16, 8, 4)
5679
+BUILD_VARS (8, 16, 8)
5681
+#define POOL1 {-10}
5682
+#define POOL2 {2, -10}
5683
+#define POOL4 {0, -10, 2, -3}
5684
+#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70}
5685
+#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \
5686
+ -5, 10, -2, 3, -4, 50, -6, 70}
5688
+#define EXPECTED1 {10}
5689
+#define EXPECTED2 {2, 10}
5690
+#define EXPECTED4 {0, 10, 2, 3}
5691
+#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70}
5692
+#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \
5693
+ 5, 10, 2, 3, 4, 50, 6, 70}
5695
+#define BUILD_TEST(size, lanes_64, lanes_128) \
5697
+test_##size (void) \
5700
+ ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \
5701
+ ETYPE (size) res1[lanes_64] = {0}; \
5702
+ ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \
5703
+ ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \
5704
+ ETYPE (size) res2[lanes_128] = {0}; \
5705
+ ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \
5707
+ /* Forcefully avoid optimization. */ \
5708
+ asm volatile ("" : : : "memory"); \
5709
+ test_vabs_##size (res1, pool1); \
5710
+ for (i = 0; i < lanes_64; i++) \
5711
+ if (res1[i] != expected1[i]) \
5714
+ /* Forcefully avoid optimization. */ \
5715
+ asm volatile ("" : : : "memory"); \
5716
+ test_vabsq_##size (res2, pool2); \
5717
+ for (i = 0; i < lanes_128; i++) \
5718
+ if (res2[i] != expected2[i]) \
5722
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
5723
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
5724
+BUILD_TEST (8 , 8, 16)
5726
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
5727
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
5728
+BUILD_TEST (16, 4, 8)
5730
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
5731
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
5732
+BUILD_TEST (32, 2, 4)
5734
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
5735
+BUILD_TEST (64, 1, 2)
5739
+#define BUILD_TEST(size) test_##size ()
5742
+main (int argc, char **argv)
5751
+/* { dg-final { cleanup-saved-temps } } */
5752
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
5753
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
5758
+atomic_fetch_add_RELAXED (int a)
5760
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
5764
+atomic_fetch_sub_RELAXED (int a)
5766
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
5770
+atomic_fetch_and_RELAXED (int a)
5772
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
5776
+atomic_fetch_nand_RELAXED (int a)
5778
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
5782
+atomic_fetch_xor_RELAXED (int a)
5784
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
5788
+atomic_fetch_or_RELAXED (int a)
5790
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
5792
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.c
5793
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.c
5795
int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
5796
unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
5797
unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
5798
+ int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
5799
+ int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
5800
int reduce_smax_value = 0;
5801
int reduce_smin_value = -15;
5802
unsigned int reduce_umax_value = 15;
5809
TESTV (reduce_smax, s);
5810
TESTV (reduce_smin, s);
5811
TESTV (reduce_umax, u);
5812
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
5813
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
5815
+/* { dg-do compile } */
5816
+/* { dg-options "-g -mgeneral-regs-only" } */
5819
+foo (const char *c, ...)
5822
+ buf[256 - 1] = '\0';
5824
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-movi.c
5825
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-movi.c
5827
+/* { dg-do run } */
5828
+/* { dg-options "-O3 --save-temps -fno-inline" } */
5830
+extern void abort (void);
5835
+movi_msl8 (int *__restrict a)
5839
+ /* { dg-final { scan-assembler "movi\\tv\[0-9\]+\.4s, 0xab, msl 8" } } */
5840
+ for (i = 0; i < N; i++)
5845
+movi_msl16 (int *__restrict a)
5849
+ /* { dg-final { scan-assembler "movi\\tv\[0-9\]+\.4s, 0xab, msl 16" } } */
5850
+ for (i = 0; i < N; i++)
5855
+mvni_msl8 (int *__restrict a)
5859
+ /* { dg-final { scan-assembler "mvni\\tv\[0-9\]+\.4s, 0xab, msl 8" } } */
5860
+ for (i = 0; i < N; i++)
5861
+ a[i] = 0xffff5400;
5865
+mvni_msl16 (int *__restrict a)
5869
+ /* { dg-final { scan-assembler "mvni\\tv\[0-9\]+\.4s, 0xab, msl 16" } } */
5870
+ for (i = 0; i < N; i++)
5871
+ a[i] = 0xff540000;
5880
+#define CHECK_ARRAY(a, val) \
5881
+ for (i = 0; i < N; i++) \
5882
+ if (a[i] != val) \
5886
+ CHECK_ARRAY (a, 0xabff);
5889
+ CHECK_ARRAY (a, 0xabffff);
5892
+ CHECK_ARRAY (a, 0xffff5400);
5895
+ CHECK_ARRAY (a, 0xff540000);
5900
+/* { dg-final { cleanup-saved-temps } } */
5901
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
5902
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
5904
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
5906
#define FTYPE double
5911
#include "vect-fcm.x"
5913
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
5914
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
5915
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5916
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
5917
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
5918
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
5919
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
5921
/* { dg-do compile } */
5922
/* { dg-options "-O2" } */
5925
+#include "atomic-op-acquire.x"
5928
-atomic_fetch_add_ACQUIRE (int a)
5930
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
5934
-atomic_fetch_sub_ACQUIRE (int a)
5936
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
5940
-atomic_fetch_and_ACQUIRE (int a)
5942
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
5946
-atomic_fetch_nand_ACQUIRE (int a)
5948
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
5952
-atomic_fetch_xor_ACQUIRE (int a)
5954
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
5958
-atomic_fetch_or_ACQUIRE (int a)
5960
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
5963
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5964
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5965
--- a/src/gcc/testsuite/gcc.target/aarch64/abs_1.c
5966
+++ b/src/gcc/testsuite/gcc.target/aarch64/abs_1.c
5968
+/* { dg-do run } */
5969
+/* { dg-options "-O2 -fno-inline --save-temps" } */
5971
+extern long long llabs (long long);
5972
+extern void abort (void);
5975
+abs64 (long long a)
5977
+ /* { dg-final { scan-assembler "eor\t" } } */
5978
+ /* { dg-final { scan-assembler "sub\t" } } */
5983
+abs64_in_dreg (long long a)
5985
+ /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
5986
+ register long long x asm ("d8") = a;
5987
+ register long long y asm ("d9");
5988
+ asm volatile ("" : : "w" (x));
5990
+ asm volatile ("" : : "w" (y));
5997
+ volatile long long ll0 = 0LL, ll1 = 1LL, llm1 = -1LL;
5999
+ if (abs64 (ll0) != 0LL)
6002
+ if (abs64 (ll1) != 1LL)
6005
+ if (abs64 (llm1) != 1LL)
6008
+ if (abs64_in_dreg (ll0) != 0LL)
6011
+ if (abs64_in_dreg (ll1) != 1LL)
6014
+ if (abs64_in_dreg (llm1) != 1LL)
6020
+/* { dg-final { cleanup-saved-temps } } */
6021
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
6022
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
6024
/* { dg-do compile } */
6025
/* { dg-options "-O2" } */
6030
+#include "atomic-comp-swap-release-acquire.x"
6033
-atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
6035
- return __atomic_compare_exchange (&v, &a, &b,
6036
- STRONG, __ATOMIC_RELEASE,
6037
- __ATOMIC_ACQUIRE);
6041
-atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
6043
- return __atomic_compare_exchange (&v, &a, &b,
6044
- WEAK, __ATOMIC_RELEASE,
6045
- __ATOMIC_ACQUIRE);
6049
-atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
6051
- return __atomic_compare_exchange_n (&v, &a, b,
6052
- STRONG, __ATOMIC_RELEASE,
6053
- __ATOMIC_ACQUIRE);
6057
-atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
6059
- return __atomic_compare_exchange_n (&v, &a, b,
6060
- WEAK, __ATOMIC_RELEASE,
6061
- __ATOMIC_ACQUIRE);
6064
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
6065
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
6066
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
6067
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
6068
@@ -138,3 +138,17 @@
6073
+void sabd (pRINT a, pRINT b, pRINT c)
6076
+ for (i = 0; i < 16; i++)
6077
+ c[i] = abs (a[i] - b[i]);
6080
+void saba (pRINT a, pRINT b, pRINT c)
6083
+ for (i = 0; i < 16; i++)
6084
+ c[i] += abs (a[i] - b[i]);
6086
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c
6087
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c
6089
+/* { dg-do run } */
6090
+/* { dg-options "-O3 -save-temps -fno-inline" } */
6092
+extern void abort ();
6095
+count_lz_v4si (unsigned *__restrict a, int *__restrict b)
6099
+ for (i = 0; i < 4; i++)
6100
+ b[i] = __builtin_clz (a[i]);
6103
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
6108
+ unsigned int x[4] = { 0x0, 0xFFFF, 0x1FFFF, 0xFFFFFFFF };
6109
+ int r[4] = { 32, 16, 15, 0 };
6112
+ count_lz_v4si (x, d);
6114
+ for (i = 0; i < 4; i++)
6123
+/* { dg-final { cleanup-saved-temps } } */
6124
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
6125
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
6127
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
6134
#include "vect-fcm.x"
6136
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
6137
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
6138
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
6139
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
6140
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
6141
--- a/src/gcc/testsuite/gcc.target/aarch64/subs3.c
6142
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs3.c
6144
+/* { dg-do run } */
6145
+/* { dg-options "-O2 --save-temps -fno-inline" } */
6147
+extern void abort (void);
6148
+typedef long long s64;
6151
+subs_ext (s64 a, int b, int c)
6162
+subs_shift_ext (s64 a, int b, int c)
6164
+ s64 d = (a - ((s64)b << 3));
6177
+ x = subs_ext (0x13000002ll, 41, 15);
6178
+ if (x != 318767121)
6181
+ x = subs_ext (0x50505050ll, 29, 4);
6182
+ if (x != 1347440724)
6185
+ x = subs_ext (0x12121212121ll, 2, 14);
6186
+ if (x != 555819311)
6189
+ x = subs_shift_ext (0x123456789ll, 4, 12);
6190
+ if (x != 591751033)
6193
+ x = subs_shift_ext (0x02020202ll, 9, 8);
6194
+ if (x != 33685963)
6197
+ x = subs_shift_ext (0x987987987987ll, 23, 41);
6198
+ if (x != -2020050673)
6204
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
6205
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
6206
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
6211
+atomic_fetch_add_ACQUIRE (int a)
6213
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
6217
+atomic_fetch_sub_ACQUIRE (int a)
6219
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
6223
+atomic_fetch_and_ACQUIRE (int a)
6225
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
6229
+atomic_fetch_nand_ACQUIRE (int a)
6231
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
6235
+atomic_fetch_xor_ACQUIRE (int a)
6237
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
6241
+atomic_fetch_or_ACQUIRE (int a)
6243
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
6245
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
6246
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
6249
+/* { dg-do run } */
6250
+/* { dg-options "-O3" } */
6252
+#include "arm_neon.h"
6254
+extern void abort (void);
6256
+#include "vaddv-intrinsic.x"
6261
+ const float32_t pool_v2sf[] = {4.0f, 9.0f};
6262
+ const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
6263
+ const float64_t pool_v2df[] = {4.0, 9.0};
6265
+ if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
6268
+ if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
6271
+ if (test_vaddv_v2df (pool_v2df) != 13.0)
6276
--- a/src/gcc/testsuite/gcc.target/aarch64/sbc.c
6277
+++ b/src/gcc/testsuite/gcc.target/aarch64/sbc.c
6279
+/* { dg-do run } */
6280
+/* { dg-options "-O2 --save-temps" } */
6282
+extern void abort (void);
6284
+typedef unsigned int u32int;
6285
+typedef unsigned long long u64int;
6288
+test_si (u32int w1, u32int w2, u32int w3, u32int w4)
6291
+ /* { dg-final { scan-assembler "sbc\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+\n" } } */
6292
+ w0 = w1 - w2 - (w3 < w4);
6297
+test_di (u64int x1, u64int x2, u64int x3, u64int x4)
6300
+ /* { dg-final { scan-assembler "sbc\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+\n" } } */
6301
+ x0 = x1 - x2 - (x3 < x4);
6310
+ x = test_si (7, 8, 12, 15);
6313
+ y = test_di (0x987654321ll, 0x123456789ll, 0x345345345ll, 0x123123123ll);
6314
+ if (y != 0x8641fdb98ll)
6319
+/* { dg-final { cleanup-saved-temps } } */
6320
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
6321
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
6329
+atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
6331
+ return __atomic_compare_exchange (&v, &a, &b,
6332
+ STRONG, __ATOMIC_RELEASE,
6333
+ __ATOMIC_ACQUIRE);
6337
+atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
6339
+ return __atomic_compare_exchange (&v, &a, &b,
6340
+ WEAK, __ATOMIC_RELEASE,
6341
+ __ATOMIC_ACQUIRE);
6345
+atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
6347
+ return __atomic_compare_exchange_n (&v, &a, b,
6348
+ STRONG, __ATOMIC_RELEASE,
6349
+ __ATOMIC_ACQUIRE);
6353
+atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
6355
+ return __atomic_compare_exchange_n (&v, &a, b,
6356
+ WEAK, __ATOMIC_RELEASE,
6357
+ __ATOMIC_ACQUIRE);
6359
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
6360
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
6362
/* { dg-do compile } */
6363
-/* { dg-options "-O2" } */
6364
+/* { dg-options "-O2 -dp" } */
6366
-#include "../../../config/aarch64/arm_neon.h"
6367
+#include <arm_neon.h>
6369
+/* Used to force a variable to a SIMD register. */
6370
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
6373
+ : /* No clobbers */);
6375
/* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
6382
+/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+, d\[0-9\]+" 1 } } */
6385
+test_vabs_s64 (int64x1_t a)
6389
+ res = vabs_s64 (a);
6394
/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
6397
test_vceqd_s64 (int64x1_t a, int64x1_t b)
6399
- return vceqd_s64 (a, b);
6403
+ res = vceqd_s64 (a, b);
6408
/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
6411
test_vceqzd_s64 (int64x1_t a)
6413
- return vceqzd_s64 (a);
6416
+ res = vceqzd_s64 (a);
6421
/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
6424
test_vcged_s64 (int64x1_t a, int64x1_t b)
6426
- return vcged_s64 (a, b);
6430
+ res = vcged_s64 (a, b);
6436
test_vcled_s64 (int64x1_t a, int64x1_t b)
6438
- return vcled_s64 (a, b);
6442
+ res = vcled_s64 (a, b);
6447
-/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
6448
+/* Idiom recognition will cause this testcase not to generate
6449
+ the expected cmge instruction, so do not check for it. */
6452
test_vcgezd_s64 (int64x1_t a)
6454
- return vcgezd_s64 (a);
6457
+ res = vcgezd_s64 (a);
6462
/* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
6465
test_vcged_u64 (uint64x1_t a, uint64x1_t b)
6467
- return vcged_u64 (a, b);
6471
+ res = vcged_u64 (a, b);
6476
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
6477
@@ -77,13 +124,23 @@
6479
test_vcgtd_s64 (int64x1_t a, int64x1_t b)
6481
- return vcgtd_s64 (a, b);
6485
+ res = vcgtd_s64 (a, b);
6491
test_vcltd_s64 (int64x1_t a, int64x1_t b)
6493
- return vcltd_s64 (a, b);
6497
+ res = vcltd_s64 (a, b);
6502
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
6505
test_vcgtzd_s64 (int64x1_t a)
6507
- return vcgtzd_s64 (a);
6510
+ res = vcgtzd_s64 (a);
6515
/* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
6518
test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
6520
- return vcgtd_u64 (a, b);
6524
+ res = vcgtd_u64 (a, b);
6529
/* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
6530
@@ -107,18 +173,27 @@
6532
test_vclezd_s64 (int64x1_t a)
6534
- return vclezd_s64 (a);
6537
+ res = vclezd_s64 (a);
6542
-/* { dg-final { scan-assembler-times "\\tcmlt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
6543
+/* Idiom recognition will cause this testcase not to generate
6544
+ the expected cmlt instruction, so do not check for it. */
6547
test_vcltzd_s64 (int64x1_t a)
6549
- return vcltzd_s64 (a);
6552
+ res = vcltzd_s64 (a);
6557
-/* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */
6558
+/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv16qi" 2 } } */
6561
test_vdupb_lane_s8 (int8x16_t a)
6563
return vdupb_lane_u8 (a, 2);
6566
-/* { dg-final { scan-assembler-times "\\tdup\\th\[0-9\]+, v\[0-9\]+\.h" 2 } } */
6567
+/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv8hi" 2 } } */
6570
test_vduph_lane_s16 (int16x8_t a)
6572
return vduph_lane_u16 (a, 2);
6575
-/* { dg-final { scan-assembler-times "\\tdup\\ts\[0-9\]+, v\[0-9\]+\.s" 2 } } */
6576
+/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv4si" 2 } } */
6579
test_vdups_lane_s32 (int32x4_t a)
6580
@@ -160,18 +235,18 @@
6581
return vdups_lane_u32 (a, 2);
6584
-/* { dg-final { scan-assembler-times "\\tdup\\td\[0-9\]+, v\[0-9\]+\.d" 2 } } */
6585
+/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv2di" 2 } } */
6588
test_vdupd_lane_s64 (int64x2_t a)
6590
- return vdupd_lane_s64 (a, 2);
6591
+ return vdupd_lane_s64 (a, 1);
6595
test_vdupd_lane_u64 (uint64x2_t a)
6597
- return vdupd_lane_u64 (a, 2);
6598
+ return vdupd_lane_u64 (a, 1);
6601
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
6602
@@ -179,13 +254,23 @@
6604
test_vtst_s64 (int64x1_t a, int64x1_t b)
6606
- return vtstd_s64 (a, b);
6610
+ res = vtstd_s64 (a, b);
6616
test_vtst_u64 (uint64x1_t a, uint64x1_t b)
6618
- return vtstd_u64 (a, b);
6622
+ res = vtstd_s64 (a, b);
6627
/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
6628
@@ -722,8 +807,11 @@
6629
return vrshld_u64 (a, b);
6632
-/* { dg-final { scan-assembler-times "\\tasr\\tx\[0-9\]+" 1 } } */
6633
+/* Other intrinsics can generate an asr instruction (vcltzd, vcgezd),
6634
+ so we cannot check scan-assembler-times. */
6636
+/* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
6639
test_vshrd_n_s64 (int64x1_t a)
6641
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
6642
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
6644
/* { dg-do compile } */
6645
/* { dg-options "-O2" } */
6648
+#include "atomic-op-int.x"
6651
-atomic_fetch_add_RELAXED (int a)
6653
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
6657
-atomic_fetch_sub_RELAXED (int a)
6659
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
6663
-atomic_fetch_and_RELAXED (int a)
6665
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
6669
-atomic_fetch_nand_RELAXED (int a)
6671
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
6675
-atomic_fetch_xor_RELAXED (int a)
6677
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
6681
-atomic_fetch_or_RELAXED (int a)
6683
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
6686
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6687
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6688
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
6689
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
6691
/* { dg-do compile } */
6692
/* { dg-options "-O2" } */
6695
+#include "atomic-op-seq_cst.x"
6698
-atomic_fetch_add_SEQ_CST (int a)
6700
- return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
6704
-atomic_fetch_sub_SEQ_CST (int a)
6706
- return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
6710
-atomic_fetch_and_SEQ_CST (int a)
6712
- return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
6716
-atomic_fetch_nand_SEQ_CST (int a)
6718
- return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
6722
-atomic_fetch_xor_SEQ_CST (int a)
6724
- return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
6728
-atomic_fetch_or_SEQ_CST (int a)
6730
- return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
6733
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6734
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6735
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
6736
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
6740
+test_vaddv_v2sf (const float32_t *pool)
6744
+ val = vld1_f32 (pool);
6745
+ return vaddv_f32 (val);
6749
+test_vaddv_v4sf (const float32_t *pool)
6753
+ val = vld1q_f32 (pool);
6754
+ return vaddvq_f32 (val);
6758
+test_vaddv_v2df (const float64_t *pool)
6762
+ val = vld1q_f64 (pool);
6763
+ return vaddvq_f64 (val);
6765
--- a/src/gcc/testsuite/gcc.target/aarch64/negs.c
6766
+++ b/src/gcc/testsuite/gcc.target/aarch64/negs.c
6768
+/* { dg-do run } */
6769
+/* { dg-options "-O2 --save-temps" } */
6771
+extern void abort (void);
6775
+negs_si_test1 (int a, int b, int c)
6779
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+" } } */
6788
+negs_si_test3 (int a, int b, int c)
6790
+ int d = -(b) << 3;
6792
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */
6800
+typedef long long s64;
6804
+negs_di_test1 (s64 a, s64 b, s64 c)
6808
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+" } } */
6817
+negs_di_test3 (s64 a, s64 b, s64 c)
6819
+ s64 d = -(b) << 3;
6821
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */
6834
+ x = negs_si_test1 (2, 12, 5);
6838
+ x = negs_si_test1 (1, 2, 32);
6842
+ x = negs_si_test3 (13, 14, 5);
6846
+ x = negs_si_test3 (15, 21, 2);
6850
+ y = negs_di_test1 (0x20202020ll,
6853
+ if (y != 0x62636263ll)
6856
+ y = negs_di_test1 (0x1010101010101ll,
6857
+ 0x123456789abcdll,
6858
+ 0x5555555555555ll);
6859
+ if (y != 0x6565656565656ll)
6862
+ y = negs_di_test3 (0x62523781ll,
6865
+ if (y != 0xfffffffd553d4edbll)
6868
+ y = negs_di_test3 (0x763526268ll,
6871
+ if (y != 0xfffffffb1b1b1b1bll)
6876
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
6877
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
6879
/* { dg-do compile } */
6880
/* { dg-options "-O2" } */
6883
+#include "atomic-op-consume.x"
6886
-atomic_fetch_add_CONSUME (int a)
6888
- return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
6892
-atomic_fetch_sub_CONSUME (int a)
6894
- return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
6898
-atomic_fetch_and_CONSUME (int a)
6900
- return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
6904
-atomic_fetch_nand_CONSUME (int a)
6906
- return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
6910
-atomic_fetch_xor_CONSUME (int a)
6912
- return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
6916
-atomic_fetch_or_CONSUME (int a)
6918
- return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
6921
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6922
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
6923
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
6924
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
6926
+/* { dg-do run } */
6927
+/* { dg-options "-O3 --save-temps -ffast-math" } */
6929
+#include <arm_neon.h>
6931
+extern void abort (void);
6932
+extern float fabsf (float);
6933
+extern double fabs (double);
6935
+#define NUM_TESTS 16
6936
+#define DELTA 0.000001
6938
+int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
6939
+ -4, 34, 110, -110, 6, 4, 75, -34};
6940
+int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
6941
+ -4, 34, 110, -110, 6, 4, 75, -34};
6942
+int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
6943
+ -4, 34, 110, -110, 6, 4, 75, -34};
6944
+int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76,
6945
+ -4, 34, 110, -110, 6, 4, 75, -34};
6947
+uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
6948
+ 4, 34, 110, 110, 6, 4, 75, 34};
6949
+uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
6950
+ 4, 34, 110, 110, 6, 4, 75, 34};
6951
+uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
6952
+ 4, 34, 110, 110, 6, 4, 75, 34};
6954
+uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76,
6955
+ 4, 34, 110, 110, 6, 4, 75, 34};
6957
+float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
6958
+ 200.0f, -800.0f, -13.0f, -0.5f,
6959
+ 7.9f, -870.0f, 10.4f, 310.11f,
6960
+ 0.0f, -865.0f, -2213.0f, -1.5f};
6962
+double input_float64[] = {0.1, -0.1, 0.4, 10.3,
6963
+ 200.0, -800.0, -13.0, -0.5,
6964
+ 7.9, -870.0, 10.4, 310.11,
6965
+ 0.0, -865.0, -2213.0, -1.5};
6967
+#define EQUALF(a, b) (fabsf (a - b) < DELTA)
6968
+#define EQUALD(a, b) (fabs (a - b) < DELTA)
6969
+#define EQUALL(a, b) (a == b)
6971
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
6973
+test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \
6976
+ int moves = (NUM_TESTS - LANES) + 1; \
6977
+ TYPE##_t out_l[NUM_TESTS]; \
6978
+ TYPE##_t out_v[NUM_TESTS]; \
6980
+ /* Calculate linearly. */ \
6981
+ for (i = 0; i < moves; i++) \
6983
+ out_l[i] = input_##TYPE[i]; \
6984
+ for (j = 1; j < LANES; j++) \
6985
+ out_l[i] += input_##TYPE[i + j]; \
6988
+ /* Calculate using vector reduction intrinsics. */ \
6989
+ for (i = 0; i < moves; i++) \
6991
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
6992
+ out_v[i] = vaddv##Q##_##SUFFIX (t1); \
6996
+ for (i = 0; i < moves; i++) \
6998
+ if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
7004
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
7005
+TEST (STYPE, , TYPE, W32, F) \
7006
+TEST (STYPE, q, TYPE, W64, F) \
7008
+BUILD_VARIANTS (int8, s8, 8, 16, L)
7009
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
7010
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
7011
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
7012
+BUILD_VARIANTS (int16, s16, 4, 8, L)
7013
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
7014
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
7015
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
7016
+BUILD_VARIANTS (int32, s32, 2, 4, L)
7017
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
7018
+/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
7019
+/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
7020
+TEST (s64, q, int64, 2, D)
7021
+TEST (u64, q, uint64, 2, D)
7022
+/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
7024
+BUILD_VARIANTS (float32, f32, 2, 4, F)
7025
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
7026
+/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
7027
+TEST (f64, q, float64, 2, D)
7028
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
7031
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
7033
+ if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ()) \
7038
+main (int argc, char **argv)
7040
+BUILD_VARIANTS (int8, s8, 8, 16, L)
7041
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
7042
+BUILD_VARIANTS (int16, s16, 4, 8, L)
7043
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
7044
+BUILD_VARIANTS (int32, s32, 2, 4, L)
7045
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
7047
+BUILD_VARIANTS (float32, f32, 2, 4, F)
7048
+TEST (f64, q, float64, 2, D)
7053
+/* { dg-final { cleanup-saved-temps } } */
7054
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
7055
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
7057
/* { dg-do compile } */
7058
/* { dg-options "-O2" } */
7061
+#include "atomic-op-char.x"
7064
-atomic_fetch_add_RELAXED (char a)
7066
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
7070
-atomic_fetch_sub_RELAXED (char a)
7072
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
7076
-atomic_fetch_and_RELAXED (char a)
7078
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
7082
-atomic_fetch_nand_RELAXED (char a)
7084
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
7088
-atomic_fetch_xor_RELAXED (char a)
7090
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
7094
-atomic_fetch_or_RELAXED (char a)
7096
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
7099
/* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7100
/* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7101
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
7102
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
7107
+atomic_fetch_add_RELAXED (int a)
7109
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
7113
+atomic_fetch_sub_RELAXED (int a)
7115
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
7119
+atomic_fetch_and_RELAXED (int a)
7121
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
7125
+atomic_fetch_nand_RELAXED (int a)
7127
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
7131
+atomic_fetch_xor_RELAXED (int a)
7133
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
7137
+atomic_fetch_or_RELAXED (int a)
7139
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
7141
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
7142
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
7147
+atomic_fetch_add_SEQ_CST (int a)
7149
+ return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
7153
+atomic_fetch_sub_SEQ_CST (int a)
7155
+ return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
7159
+atomic_fetch_and_SEQ_CST (int a)
7161
+ return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
7165
+atomic_fetch_nand_SEQ_CST (int a)
7167
+ return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
7171
+atomic_fetch_xor_SEQ_CST (int a)
7173
+ return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
7177
+atomic_fetch_or_SEQ_CST (int a)
7179
+ return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
7181
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
7182
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
7187
+atomic_fetch_add_CONSUME (int a)
7189
+ return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
7193
+atomic_fetch_sub_CONSUME (int a)
7195
+ return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
7199
+atomic_fetch_and_CONSUME (int a)
7201
+ return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
7205
+atomic_fetch_nand_CONSUME (int a)
7207
+ return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
7211
+atomic_fetch_xor_CONSUME (int a)
7213
+ return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
7217
+atomic_fetch_or_CONSUME (int a)
7219
+ return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
7221
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
7222
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
7224
/* { dg-do compile } */
7225
/* { dg-options "-O2" } */
7228
+#include "atomic-op-short.x"
7231
-atomic_fetch_add_RELAXED (short a)
7233
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
7237
-atomic_fetch_sub_RELAXED (short a)
7239
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
7243
-atomic_fetch_and_RELAXED (short a)
7245
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
7249
-atomic_fetch_nand_RELAXED (short a)
7251
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
7255
-atomic_fetch_xor_RELAXED (short a)
7257
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
7261
-atomic_fetch_or_RELAXED (short a)
7263
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
7266
/* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7267
/* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7268
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
7269
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
7274
+atomic_fetch_add_RELAXED (char a)
7276
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
7280
+atomic_fetch_sub_RELAXED (char a)
7282
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
7286
+atomic_fetch_and_RELAXED (char a)
7288
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
7292
+atomic_fetch_nand_RELAXED (char a)
7294
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
7298
+atomic_fetch_xor_RELAXED (char a)
7300
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
7304
+atomic_fetch_or_RELAXED (char a)
7306
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
7308
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
7309
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
7311
/* { dg-final { scan-assembler "fdiv\\tv" } } */
7312
/* { dg-final { scan-assembler "fneg\\tv" } } */
7313
/* { dg-final { scan-assembler "fabs\\tv" } } */
7314
+/* { dg-final { scan-assembler "fabd\\tv" } } */
7315
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
7316
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
7318
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
7325
#include "vect-fcm.x"
7327
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
7328
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
7329
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
7330
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
7331
/* { dg-final { cleanup-tree-dump "vect" } } */
7332
--- a/src/gcc/testsuite/gcc.target/aarch64/adds1.c
7333
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds1.c
7335
+/* { dg-do run } */
7336
+/* { dg-options "-O2 --save-temps -fno-inline" } */
7338
+extern void abort (void);
7341
+adds_si_test1 (int a, int b, int c)
7345
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
7353
+adds_si_test2 (int a, int b, int c)
7357
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, 255" } } */
7365
+adds_si_test3 (int a, int b, int c)
7367
+ int d = a + (b << 3);
7369
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
7376
+typedef long long s64;
7379
+adds_di_test1 (s64 a, s64 b, s64 c)
7383
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
7391
+adds_di_test2 (s64 a, s64 b, s64 c)
7395
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, 255" } } */
7403
+adds_di_test3 (s64 a, s64 b, s64 c)
7405
+ s64 d = a + (b << 3);
7407
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
7419
+ x = adds_si_test1 (29, 4, 5);
7423
+ x = adds_si_test1 (5, 2, 20);
7427
+ x = adds_si_test2 (29, 4, 5);
7431
+ x = adds_si_test2 (1024, 2, 20);
7435
+ x = adds_si_test3 (35, 4, 5);
7439
+ x = adds_si_test3 (5, 2, 20);
7443
+ y = adds_di_test1 (0x130000029ll,
7447
+ if (y != 0xc75050536)
7450
+ y = adds_di_test1 (0x5000500050005ll,
7451
+ 0x2111211121112ll,
7452
+ 0x0000000002020ll);
7453
+ if (y != 0x9222922294249)
7456
+ y = adds_di_test2 (0x130000029ll,
7459
+ if (y != 0x955050631)
7462
+ y = adds_di_test2 (0x130002900ll,
7465
+ if (y != 0x955052f08)
7468
+ y = adds_di_test3 (0x130000029ll,
7471
+ if (y != 0x9b9050576)
7474
+ y = adds_di_test3 (0x130002900ll,
7477
+ if (y != 0xafd052e4d)
7483
+/* { dg-final { cleanup-saved-temps } } */
7484
--- a/src/gcc/testsuite/gcc.target/aarch64/insv_1.c
7485
+++ b/src/gcc/testsuite/gcc.target/aarch64/insv_1.c
7487
+/* { dg-do run } */
7488
+/* { dg-options "-O2 --save-temps -fno-inline" } */
7490
+extern void abort (void);
7492
+typedef struct bitfield
7494
+ unsigned short eight: 8;
7495
+ unsigned short four: 4;
7496
+ unsigned short five: 5;
7497
+ unsigned short seven: 7;
7498
+ unsigned int sixteen: 16;
7504
+ /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 0, 8" } } */
7512
+ /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 16, 5" } } */
7520
+ /* { dg-final { scan-assembler "movk\tx\[0-9\]+, 0x1d6b, lsl 32" } } */
7528
+ /* { dg-final { scan-assembler "orr\tx\[0-9\]+, x\[0-9\]+, 2031616" } } */
7536
+ /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -2031617" } } */
7543
+main (int argc, char** argv)
7545
+ static bitfield a;
7546
+ bitfield b = bfi1 (a);
7547
+ bitfield c = bfi2 (b);
7548
+ bitfield d = movk (c);
7556
+ if (d.sixteen != 7531)
7560
+ if (d.five != 0x1f)
7570
+/* { dg-final { cleanup-saved-temps } } */
7571
--- a/src/gcc/testsuite/gcc.target/aarch64/ror.c
7572
+++ b/src/gcc/testsuite/gcc.target/aarch64/ror.c
7574
+/* { dg-options "-O2 --save-temps" } */
7575
+/* { dg-do run } */
7577
+extern void abort (void);
7582
+ /* { dg-final { scan-assembler "ror\tw\[0-9\]+, w\[0-9\]+, 27\n" } } */
7583
+ return (a << 5) | ((unsigned int) a >> 27);
7587
+test_di (long long a)
7589
+ /* { dg-final { scan-assembler "ror\tx\[0-9\]+, x\[0-9\]+, 45\n" } } */
7590
+ return (a << 19) | ((unsigned long long) a >> 45);
7598
+ v = test_si (0x0203050);
7599
+ if (v != 0x4060a00)
7601
+ w = test_di (0x0000020506010304ll);
7602
+ if (w != 0x1028300818200000ll)
7607
+/* { dg-final { cleanup-saved-temps } } */
7608
--- a/src/gcc/testsuite/gcc.target/aarch64/ands_1.c
7609
+++ b/src/gcc/testsuite/gcc.target/aarch64/ands_1.c
7611
+/* { dg-do run } */
7612
+/* { dg-options "-O2 --save-temps -fno-inline" } */
7614
+extern void abort (void);
7617
+ands_si_test1 (int a, int b, int c)
7621
+ /* { dg-final { scan-assembler-times "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */
7629
+ands_si_test2 (int a, int b, int c)
7633
+ /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, 255" } } */
7641
+ands_si_test3 (int a, int b, int c)
7643
+ int d = a & (b << 3);
7645
+ /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
7652
+typedef long long s64;
7655
+ands_di_test1 (s64 a, s64 b, s64 c)
7659
+ /* { dg-final { scan-assembler-times "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */
7667
+ands_di_test2 (s64 a, s64 b, s64 c)
7671
+ /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, 255" } } */
7679
+ands_di_test3 (s64 a, s64 b, s64 c)
7681
+ s64 d = a & (b << 3);
7683
+ /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
7696
+ x = ands_si_test1 (29, 4, 5);
7700
+ x = ands_si_test1 (5, 2, 20);
7704
+ x = ands_si_test2 (29, 4, 5);
7708
+ x = ands_si_test2 (1024, 2, 20);
7712
+ x = ands_si_test3 (35, 4, 5);
7716
+ x = ands_si_test3 (5, 2, 20);
7720
+ y = ands_di_test1 (0x130000029ll,
7724
+ if (y != ((0x130000029ll & 0x320000004ll) + 0x320000004ll + 0x505050505ll))
7727
+ y = ands_di_test1 (0x5000500050005ll,
7728
+ 0x2111211121112ll,
7729
+ 0x0000000002020ll);
7730
+ if (y != 0x5000500052025ll)
7733
+ y = ands_di_test2 (0x130000029ll,
7736
+ if (y != ((0x130000029ll & 0xff) + 0x320000004ll + 0x505050505ll))
7739
+ y = ands_di_test2 (0x130002900ll,
7742
+ if (y != (0x130002900ll + 0x505050505ll))
7745
+ y = ands_di_test3 (0x130000029ll,
7748
+ if (y != ((0x130000029ll & (0x064000008ll << 3))
7749
+ + 0x064000008ll + 0x505050505ll))
7752
+ y = ands_di_test3 (0x130002900ll,
7755
+ if (y != (0x130002900ll + 0x505050505ll))
7761
+/* { dg-final { cleanup-saved-temps } } */
7762
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
7763
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
7765
/* { dg-do compile } */
7766
/* { dg-options "-O2" } */
7769
+#include "atomic-op-release.x"
7772
-atomic_fetch_add_RELEASE (int a)
7774
- return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
7778
-atomic_fetch_sub_RELEASE (int a)
7780
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
7784
-atomic_fetch_and_RELEASE (int a)
7786
- return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
7790
-atomic_fetch_nand_RELEASE (int a)
7792
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
7796
-atomic_fetch_xor_RELEASE (int a)
7798
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
7802
-atomic_fetch_or_RELEASE (int a)
7804
- return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
7807
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7808
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
7809
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c
7810
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c
7812
+/* { dg-do run } */
7813
+/* { dg-options "-O3 --save-temps -ffast-math" } */
7815
+#include <arm_neon.h>
7817
+extern void abort (void);
7819
+extern float fabsf (float);
7820
+extern double fabs (double);
7821
+extern int isnan (double);
7822
+extern float fmaxf (float, float);
7823
+extern float fminf (float, float);
7824
+extern double fmax (double, double);
7825
+extern double fmin (double, double);
7827
+#define NUM_TESTS 16
7828
+#define DELTA 0.000001
7829
+#define NAN (0.0 / 0.0)
7831
+float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
7832
+ 200.0f, -800.0f, -13.0f, -0.5f,
7833
+ NAN, -870.0f, 10.4f, 310.11f,
7834
+ 0.0f, -865.0f, -2213.0f, -1.5f};
7836
+double input_float64[] = {0.1, -0.1, 0.4, 10.3,
7837
+ 200.0, -800.0, -13.0, -0.5,
7838
+ NAN, -870.0, 10.4, 310.11,
7839
+ 0.0, -865.0, -2213.0, -1.5};
7841
+#define EQUALF(a, b) (fabsf (a - b) < DELTA)
7842
+#define EQUALD(a, b) (fabs (a - b) < DELTA)
7844
+/* Floating point 'unordered' variants. */
7847
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
7849
+test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
7852
+ int moves = (NUM_TESTS - LANES) + 1; \
7853
+ TYPE##_t out_l[NUM_TESTS]; \
7854
+ TYPE##_t out_v[NUM_TESTS]; \
7856
+ /* Calculate linearly. */ \
7857
+ for (i = 0; i < moves; i++) \
7859
+ out_l[i] = input_##TYPE[i]; \
7860
+ for (j = 0; j < LANES; j++) \
7862
+ if (isnan (out_l[i])) \
7864
+ if (isnan (input_##TYPE[i + j]) \
7865
+ || input_##TYPE[i + j] CMP_OP out_l[i]) \
7866
+ out_l[i] = input_##TYPE[i + j]; \
7870
+ /* Calculate using vector reduction intrinsics. */ \
7871
+ for (i = 0; i < moves; i++) \
7873
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
7874
+ out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
7878
+ for (i = 0; i < moves; i++) \
7880
+ if (!EQUAL##FLOAT (out_v[i], out_l[i]) \
7881
+ && !(isnan (out_v[i]) && isnan (out_l[i]))) \
7887
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
7888
+TEST (max, >, STYPE, , TYPE, W32, F) \
7889
+TEST (max, >, STYPE, q, TYPE, W64, F) \
7890
+TEST (min, <, STYPE, , TYPE, W32, F) \
7891
+TEST (min, <, STYPE, q, TYPE, W64, F)
7893
+BUILD_VARIANTS (float32, f32, 2, 4, F)
7894
+/* { dg-final { scan-assembler "fmaxp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
7895
+/* { dg-final { scan-assembler "fminp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
7896
+/* { dg-final { scan-assembler "fmaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
7897
+/* { dg-final { scan-assembler "fminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
7898
+TEST (max, >, f64, q, float64, 2, D)
7899
+/* { dg-final { scan-assembler "fmaxp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
7900
+TEST (min, <, f64, q, float64, 2, D)
7901
+/* { dg-final { scan-assembler "fminp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
7903
+/* Floating point 'nm' variants. */
7906
+#define TEST(MAXMIN, F, SUFFIX, Q, TYPE, LANES, FLOAT) \
7908
+test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t (void) \
7911
+ int moves = (NUM_TESTS - LANES) + 1; \
7912
+ TYPE##_t out_l[NUM_TESTS]; \
7913
+ TYPE##_t out_v[NUM_TESTS]; \
7915
+ /* Calculate linearly. */ \
7916
+ for (i = 0; i < moves; i++) \
7918
+ out_l[i] = input_##TYPE[i]; \
7919
+ for (j = 0; j < LANES; j++) \
7920
+ out_l[i] = f##MAXMIN##F (input_##TYPE[i + j], out_l[i]); \
7923
+ /* Calculate using vector reduction intrinsics. */ \
7924
+ for (i = 0; i < moves; i++) \
7926
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
7927
+ out_v[i] = v##MAXMIN##nmv##Q##_##SUFFIX (t1); \
7931
+ for (i = 0; i < moves; i++) \
7933
+ if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
7939
+TEST (max, f, f32, , float32, 2, D)
7940
+/* { dg-final { scan-assembler "fmaxnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
7941
+TEST (min, f, f32, , float32, 2, D)
7942
+/* { dg-final { scan-assembler "fminnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
7943
+TEST (max, f, f32, q, float32, 4, D)
7944
+/* { dg-final { scan-assembler "fmaxnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
7945
+TEST (min, f, f32, q, float32, 4, D)
7946
+/* { dg-final { scan-assembler "fminnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
7947
+TEST (max, , f64, q, float64, 2, D)
7948
+/* { dg-final { scan-assembler "fmaxnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
7949
+TEST (min, , f64, q, float64, 2, D)
7950
+/* { dg-final { scan-assembler "fminnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
7953
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
7955
+ if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
7960
+main (int argc, char **argv)
7962
+ BUILD_VARIANTS (float32, f32, 2, 4, F)
7963
+ TEST (max, >, f64, q, float64, 2, D)
7964
+ TEST (min, <, f64, q, float64, 2, D)
7967
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
7969
+ if (!test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t ()) \
7973
+ BUILD_VARIANTS (float32, f32, 2, 4, F)
7974
+ TEST (max, >, f64, q, float64, 2, D)
7975
+ TEST (min, <, f64, q, float64, 2, D)
7980
+/* { dg-final { cleanup-saved-temps } } */
7981
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
7982
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
7987
+atomic_fetch_add_RELAXED (short a)
7989
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
7993
+atomic_fetch_sub_RELAXED (short a)
7995
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
7999
+atomic_fetch_and_RELAXED (short a)
8001
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
8005
+atomic_fetch_nand_RELAXED (short a)
8007
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
8011
+atomic_fetch_xor_RELAXED (short a)
8013
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
8017
+atomic_fetch_or_RELAXED (short a)
8019
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
8021
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
8022
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
8024
+/* { dg-do run } */
8025
+/* { dg-options "-O3 --save-temps -ffast-math" } */
8027
+#include <arm_neon.h>
8029
+extern void abort (void);
8030
+extern double fabs (double);
8032
+#define NUM_TESTS 8
8033
+#define DELTA 0.000001
8035
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
8036
+ 200.0f, -800.0f, -13.0f, -0.5f};
8037
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
8038
+ 200.0, -800.0, -13.0, -0.5};
8040
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
8042
+test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t (void) \
8046
+ int nlanes = LANES; \
8047
+ U##int##WIDTH##_t expected_out[NUM_TESTS]; \
8048
+ U##int##WIDTH##_t actual_out[NUM_TESTS]; \
8050
+ for (i = 0; i < NUM_TESTS; i++) \
8053
+ = vcvt##SUFFIX##D##_##S##WIDTH##_f##WIDTH (input_f##WIDTH[i]); \
8054
+ /* Don't vectorize this. */ \
8055
+ asm volatile ("" : : : "memory"); \
8058
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
8060
+ U##int##WIDTH##x##LANES##_t out = \
8061
+ vcvt##SUFFIX##Q##_##S##WIDTH##_f##WIDTH \
8062
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
8063
+ vst1##Q##_##S##WIDTH (actual_out + i, out); \
8066
+ for (i = 0; i < NUM_TESTS; i++) \
8067
+ ret &= fabs (expected_out[i] - actual_out[i]) < DELTA; \
8073
+#define BUILD_VARIANTS(SUFFIX) \
8074
+TEST (SUFFIX, , 32, 2, s, ,s) \
8075
+TEST (SUFFIX, q, 32, 4, s, ,s) \
8076
+TEST (SUFFIX, q, 64, 2, s, ,d) \
8077
+TEST (SUFFIX, , 32, 2, u,u,s) \
8078
+TEST (SUFFIX, q, 32, 4, u,u,s) \
8079
+TEST (SUFFIX, q, 64, 2, u,u,d) \
8082
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
8083
+/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */
8084
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8085
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8086
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8087
+/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */
8088
+/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */
8089
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8090
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8091
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8093
+/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */
8094
+/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */
8095
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8096
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8097
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8098
+/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */
8099
+/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */
8100
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8101
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8102
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8104
+/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */
8105
+/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */
8106
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8107
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8108
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8109
+/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */
8110
+/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */
8111
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8112
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8113
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8115
+/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */
8116
+/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */
8117
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8118
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8119
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8120
+/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */
8121
+/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */
8122
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8123
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8124
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8126
+/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */
8127
+/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */
8128
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8129
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8130
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8131
+/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */
8132
+/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */
8133
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
8134
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
8135
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8138
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
8140
+ if (!test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t ()) \
8145
+main (int argc, char **argv)
8147
+ BUILD_VARIANTS ( )
8148
+ BUILD_VARIANTS (a)
8149
+ BUILD_VARIANTS (m)
8150
+ BUILD_VARIANTS (n)
8151
+ BUILD_VARIANTS (p)
8155
+/* { dg-final { cleanup-saved-temps } } */
8156
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
8157
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
8162
+atomic_fetch_add_RELEASE (int a)
8164
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
8168
+atomic_fetch_sub_RELEASE (int a)
8170
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
8174
+atomic_fetch_and_RELEASE (int a)
8176
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
8180
+atomic_fetch_nand_RELEASE (int a)
8182
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
8186
+atomic_fetch_xor_RELEASE (int a)
8188
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
8192
+atomic_fetch_or_RELEASE (int a)
8194
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
8196
--- a/src/gcc/testsuite/gcc.target/aarch64/fabd.c
8197
+++ b/src/gcc/testsuite/gcc.target/aarch64/fabd.c
8199
+/* { dg-do run } */
8200
+/* { dg-options "-O1 -fno-inline --save-temps" } */
8202
+extern double fabs (double);
8203
+extern float fabsf (float);
8204
+extern void abort ();
8205
+extern void exit (int);
8208
+fabd_d (double x, double y, double d)
8210
+ if ((fabs (x - y) - d) > 0.00001)
8214
+/* { dg-final { scan-assembler "fabd\td\[0-9\]+" } } */
8217
+fabd_f (float x, float y, float d)
8219
+ if ((fabsf (x - y) - d) > 0.00001)
8223
+/* { dg-final { scan-assembler "fabd\ts\[0-9\]+" } } */
8228
+ fabd_d (10.0, 5.0, 5.0);
8229
+ fabd_d (5.0, 10.0, 5.0);
8230
+ fabd_f (10.0, 5.0, 5.0);
8231
+ fabd_f (5.0, 10.0, 5.0);
8236
+/* { dg-final { cleanup-saved-temps } } */
8237
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
8238
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
8239
@@ -117,6 +117,16 @@
8240
9.0, 10.0, 11.0, 12.0,
8241
13.0, 14.0, 15.0, 16.0 };
8243
+ F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
8244
+ 1.0f, 1.0f, 1.0f, 1.0f,
8245
+ 1.0f, 1.0f, 1.0f, 1.0f,
8246
+ 1.0f, 1.0f, 1.0f, 1.0f };
8248
+ F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
8249
+ 1.0, 1.0, 1.0, 1.0,
8250
+ 1.0, 1.0, 1.0, 1.0,
8251
+ 1.0, 1.0, 1.0, 1.0 };
8253
/* Setup input vectors. */
8254
for (i=1; i<=16; i++)
8264
--- a/src/gcc/testsuite/gcc.target/aarch64/ngc.c
8265
+++ b/src/gcc/testsuite/gcc.target/aarch64/ngc.c
8267
+/* { dg-do run } */
8268
+/* { dg-options "-O2 --save-temps -fno-inline" } */
8270
+extern void abort (void);
8271
+typedef unsigned int u32;
8274
+ngc_si (u32 a, u32 b, u32 c, u32 d)
8280
+typedef unsigned long long u64;
8283
+ngc_si_tst (u64 a, u32 b, u32 c, u32 d)
8290
+ngc_di (u64 a, u64 b, u64 c, u64 d)
8302
+ x = ngc_si (29, 4, 5, 4);
8306
+ x = ngc_si (1024, 2, 20, 13);
8310
+ y = ngc_si_tst (0x130000029ll, 32, 50, 12);
8311
+ if (y != 0xffffffe0)
8314
+ y = ngc_si_tst (0x5000500050005ll, 21, 2, 14);
8315
+ if (y != 0xffffffea)
8318
+ y = ngc_di (0x130000029ll, 0x320000004ll, 0x505050505ll, 0x123123123ll);
8319
+ if (y != 0xfffffffcdffffffc)
8322
+ y = ngc_di (0x5000500050005ll,
8323
+ 0x2111211121112ll, 0x0000000002020ll, 0x1414575046477ll);
8324
+ if (y != 0xfffdeeedeeedeeed)
8330
+/* { dg-final { scan-assembler-times "ngc\tw\[0-9\]+, w\[0-9\]+" 2 } } */
8331
+/* { dg-final { scan-assembler-times "ngc\tx\[0-9\]+, x\[0-9\]+" 1 } } */
8332
+/* { dg-final { cleanup-saved-temps } } */
8333
--- a/src/gcc/testsuite/gcc.target/aarch64/cmp.c
8334
+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp.c
8336
+/* { dg-do compile } */
8337
+/* { dg-options "-O2" } */
8340
+cmp_si_test1 (int a, int b, int c)
8349
+cmp_si_test2 (int a, int b, int c)
8357
+typedef long long s64;
8360
+cmp_di_test1 (s64 a, s64 b, s64 c)
8369
+cmp_di_test2 (s64 a, s64 b, s64 c)
8378
+cmp_di_test3 (int a, s64 b, s64 c)
8387
+cmp_di_test4 (int a, s64 b, s64 c)
8389
+ if (((s64)a << 3) > b)
8395
+/* { dg-final { scan-assembler-times "cmp\tw\[0-9\]+, w\[0-9\]+" 2 } } */
8396
+/* { dg-final { scan-assembler-times "cmp\tx\[0-9\]+, x\[0-9\]+" 4 } } */
8397
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
8398
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
8400
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
8407
#include "vect-fcm.x"
8409
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
8410
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
8411
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
8412
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
8413
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
8414
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
8415
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
8417
extern float fabsf (float);
8418
extern double fabs (double);
8420
+#define DEF3a(fname, type, op) \
8421
+ void fname##_##type (pR##type a, \
8426
+ for (i = 0; i < 16; i++) \
8427
+ a[i] = op (b[i] - c[i]); \
8430
#define DEF3(fname, type, op) \
8431
void fname##_##type (pR##type a, \
8436
- for (i=0; i<16; i++) \
8437
+ for (i = 0; i < 16; i++) \
8438
a[i] = b[i] op c[i]; \
8445
- for (i=0; i<16; i++) \
8446
+ for (i = 0; i < 16; i++) \
8451
+#define DEFN3a(fname, op) \
8452
+ DEF3a (fname, F32, op) \
8453
+ DEF3a (fname, F64, op)
8455
#define DEFN3(fname, op) \
8456
DEF3 (fname, F32, op) \
8457
DEF3 (fname, F64, op)
8460
DEF2 (abs, F32, fabsf)
8461
DEF2 (abs, F64, fabs)
8462
+DEF3a (fabd, F32, fabsf)
8463
+DEF3a (fabd, F64, fabs)
8464
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
8465
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
8467
/* { dg-do compile } */
8468
/* { dg-options "-O2" } */
8471
+#include "atomic-op-acq_rel.x"
8474
-atomic_fetch_add_ACQ_REL (int a)
8476
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
8480
-atomic_fetch_sub_ACQ_REL (int a)
8482
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
8486
-atomic_fetch_and_ACQ_REL (int a)
8488
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
8492
-atomic_fetch_nand_ACQ_REL (int a)
8494
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
8498
-atomic_fetch_xor_ACQ_REL (int a)
8500
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
8504
-atomic_fetch_or_ACQ_REL (int a)
8506
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
8509
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
8510
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
8511
--- a/src/gcc/testsuite/gcc.target/aarch64/subs1.c
8512
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs1.c
8514
+/* { dg-do run } */
8515
+/* { dg-options "-O2 --save-temps -fno-inline" } */
8517
+extern void abort (void);
8520
+subs_si_test1 (int a, int b, int c)
8524
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
8532
+subs_si_test2 (int a, int b, int c)
8536
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, #255" } } */
8544
+subs_si_test3 (int a, int b, int c)
8546
+ int d = a - (b << 3);
8548
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
8555
+typedef long long s64;
8558
+subs_di_test1 (s64 a, s64 b, s64 c)
8562
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
8570
+subs_di_test2 (s64 a, s64 b, s64 c)
8574
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, #255" } } */
8582
+subs_di_test3 (s64 a, s64 b, s64 c)
8584
+ s64 d = a - (b << 3);
8586
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
8598
+ x = subs_si_test1 (29, 4, 5);
8602
+ x = subs_si_test1 (5, 2, 20);
8606
+ x = subs_si_test2 (29, 4, 5);
8610
+ x = subs_si_test2 (1024, 2, 20);
8614
+ x = subs_si_test3 (35, 4, 5);
8618
+ x = subs_si_test3 (5, 2, 20);
8622
+ y = subs_di_test1 (0x130000029ll,
8626
+ if (y != 0x45000002d)
8629
+ y = subs_di_test1 (0x5000500050005ll,
8630
+ 0x2111211121112ll,
8631
+ 0x0000000002020ll);
8632
+ if (y != 0x7111711171117)
8635
+ y = subs_di_test2 (0x130000029ll,
8638
+ if (y != 0x955050433)
8641
+ y = subs_di_test2 (0x130002900ll,
8644
+ if (y != 0x955052d0a)
8647
+ y = subs_di_test3 (0x130000029ll,
8650
+ if (y != 0x3790504f6)
8653
+ y = subs_di_test3 (0x130002900ll,
8656
+ if (y != 0x27d052dcd)
8662
+/* { dg-final { cleanup-saved-temps } } */
8663
--- a/src/gcc/testsuite/gcc.target/aarch64/adds2.c
8664
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds2.c
8666
+/* { dg-do run } */
8667
+/* { dg-options "-O2 --save-temps -fno-inline" } */
8669
+extern void abort (void);
8672
+adds_si_test1 (int a, int b, int c)
8676
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
8677
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
8685
+adds_si_test2 (int a, int b, int c)
8687
+ int d = a + 0xfff;
8689
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
8690
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
8698
+adds_si_test3 (int a, int b, int c)
8700
+ int d = a + (b << 3);
8702
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
8703
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
8710
+typedef long long s64;
8713
+adds_di_test1 (s64 a, s64 b, s64 c)
8717
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
8718
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
8726
+adds_di_test2 (s64 a, s64 b, s64 c)
8728
+ s64 d = a + 0x1000ll;
8730
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
8731
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
8739
+adds_di_test3 (s64 a, s64 b, s64 c)
8741
+ s64 d = a + (b << 3);
8743
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
8744
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
8756
+ x = adds_si_test1 (29, 4, 5);
8760
+ x = adds_si_test1 (5, 2, 20);
8764
+ x = adds_si_test2 (29, 4, 5);
8768
+ x = adds_si_test2 (1024, 2, 20);
8772
+ x = adds_si_test3 (35, 4, 5);
8776
+ x = adds_si_test3 (5, 2, 20);
8780
+ y = adds_di_test1 (0x130000029ll,
8784
+ if (y != 0xc75050536)
8787
+ y = adds_di_test1 (0x5000500050005ll,
8788
+ 0x2111211121112ll,
8789
+ 0x0000000002020ll);
8790
+ if (y != 0x9222922294249)
8793
+ y = adds_di_test2 (0x130000029ll,
8796
+ if (y != 0x955051532)
8799
+ y = adds_di_test2 (0x540004100ll,
8802
+ if (y != 0x1065055309)
8805
+ y = adds_di_test3 (0x130000029ll,
8808
+ if (y != 0x9b9050576)
8811
+ y = adds_di_test3 (0x130002900ll,
8814
+ if (y != 0xafd052e4d)
8820
+/* { dg-final { cleanup-saved-temps } } */
8821
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
8822
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
8824
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
8826
#define FTYPE double
8831
#include "vect-fcm.x"
8833
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
8834
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
8835
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
8836
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
8837
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
8838
--- a/src/gcc/testsuite/lib/target-supports.exp
8839
+++ b/src/gcc/testsuite/lib/target-supports.exp
8840
@@ -2012,6 +2012,7 @@
8841
|| ([istarget powerpc*-*-*]
8842
&& ![istarget powerpc-*-linux*paired*])
8843
|| [istarget x86_64-*-*]
8844
+ || [istarget aarch64*-*-*]
8845
|| ([istarget arm*-*-*]
8846
&& [check_effective_target_arm_neon_ok])} {
8847
set et_vect_uintfloat_cvt_saved 1
8848
@@ -2147,22 +2148,6 @@
8852
-# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
8853
-# -mfloat-abi=softfp
8854
-proc check_effective_target_arm_v8_neon_ok {} {
8855
- if { [check_effective_target_arm32] } {
8856
- return [check_no_compiler_messages arm_v8_neon_ok object {
8859
- __asm__ volatile ("vrintn.f32 q0, q0");
8862
- } "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"]
8868
# Return 1 if this is an ARM target supporting -mfpu=vfp
8869
# -mfloat-abi=hard. Some multilibs may be incompatible with these
8871
@@ -2226,7 +2211,8 @@
8872
if { ! [check_effective_target_arm_v8_neon_ok] } {
8875
- return "$flags -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=softfp"
8876
+ global et_arm_v8_neon_flags
8877
+ return "$flags $et_arm_v8_neon_flags -march=armv8-a"
8880
# Add the options needed for NEON. We need either -mfloat-abi=softfp
8881
@@ -2270,6 +2256,79 @@
8882
check_effective_target_arm_neon_ok_nocache]
8885
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
8886
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
8887
+# incompatible with these options. Also set et_arm_neon_flags to the
8888
+# best options to add.
8890
+proc check_effective_target_arm_neon_fp16_ok_nocache { } {
8891
+ global et_arm_neon_fp16_flags
8892
+ set et_arm_neon_fp16_flags ""
8893
+ if { [check_effective_target_arm32] } {
8894
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
8895
+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
8896
+ if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
8897
+ #include "arm_neon.h"
8899
+ foo (float32x4_t arg)
8901
+ return vcvt_f16_f32 (arg);
8904
+ set et_arm_neon_fp16_flags $flags
8913
+proc check_effective_target_arm_neon_fp16_ok { } {
8914
+ return [check_cached_effective_target arm_neon_fp16_ok \
8915
+ check_effective_target_arm_neon_fp16_ok_nocache]
8918
+proc add_options_for_arm_neon_fp16 { flags } {
8919
+ if { ! [check_effective_target_arm_neon_fp16_ok] } {
8922
+ global et_arm_neon_fp16_flags
8923
+ return "$flags $et_arm_neon_fp16_flags"
8926
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
8927
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
8928
+# incompatible with these options. Also set et_arm_v8_neon_flags to the
8929
+# best options to add.
8931
+proc check_effective_target_arm_v8_neon_ok_nocache { } {
8932
+ global et_arm_v8_neon_flags
8933
+ set et_arm_v8_neon_flags ""
8934
+ if { [check_effective_target_arm32] } {
8935
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
8936
+ if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
8937
+ #include "arm_neon.h"
8941
+ __asm__ volatile ("vrintn.f32 q0, q0");
8944
+ set et_arm_v8_neon_flags $flags
8953
+proc check_effective_target_arm_v8_neon_ok { } {
8954
+ return [check_cached_effective_target arm_v8_neon_ok \
8955
+ check_effective_target_arm_v8_neon_ok_nocache]
8958
# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
8959
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
8960
# incompatible with these options. Also set et_arm_neonv2_flags to the
8961
@@ -2509,6 +2568,24 @@
8962
} [add_options_for_arm_neonv2 ""]]
8965
+# Return 1 if the target supports executing ARMv8 NEON instructions, 0
8968
+proc check_effective_target_arm_v8_neon_hw { } {
8969
+ return [check_runtime arm_v8_neon_hw_available {
8970
+ #include "arm_neon.h"
8975
+ asm ("vrinta.f32 %P0, %P1"
8980
+ } [add_options_for_arm_v8_neon ""]]
8983
# Return 1 if this is a ARM target with NEON enabled.
8985
proc check_effective_target_arm_neon { } {
8986
@@ -4591,6 +4668,33 @@
8990
+# Return 1 if programs are intended to be run on hardware rather than
8993
+proc check_effective_target_hw { } {
8995
+ # All "src/sim" simulators set this one.
8996
+ if [board_info target exists is_simulator] {
8997
+ if [board_info target is_simulator] {
9004
+ # The "sid" simulators don't set that one, but at least they set
9006
+ if [board_info target exists slow_simulator] {
9007
+ if [board_info target slow_simulator] {
9017
# Return 1 if the target is a VxWorks kernel.
9019
proc check_effective_target_vxworks_kernel { } {
9020
--- a/src/gcc/testsuite/ChangeLog.linaro
9021
+++ b/src/gcc/testsuite/ChangeLog.linaro
9023
+2013-08-07 Christophe Lyon <christophe.lyon@linaro.org>
9025
+ Backport from trunk r199720
9026
+ 2013-06-06 Marcus Shawcroft <marcus.shawcroft@arm.com>
9028
+ * gcc.dg/vect/no-section-anchors-vect-68.c:
9029
+ Add dg-skip-if aarch64_tiny.
9031
+2013-08-07 Christophe Lyon <christophe.lyon@linaro.org>
9033
+ Backport from trunk r201237.
9034
+ 2013-07-25 Terry Guo <terry.guo@arm.com>
9036
+ * gcc.target/arm/thumb1-Os-mult.c: New test case.
9038
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
9040
+ Backport from trunk r200596,201067,201083.
9041
+ 2013-07-02 Ian Bolton <ian.bolton@arm.com>
9043
+ * gcc.target/aarch64/abs_1.c: New test.
9045
+ 2013-07-19 Ian Bolton <ian.bolton@arm.com>
9047
+ * gcc.target/aarch64/scalar_intrinsics.c (test_vabs_s64): Added
9050
+ 2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
9052
+ * gcc.target/aarch64/vabs_intrinsic_1.c: New file.
9054
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
9056
+ Backport from trunk r198864.
9057
+ 2013-05-07 Ian Bolton <ian.bolton@arm.com>
9059
+ * gcc.target/aarch64/ands_1.c: New test.
9060
+ * gcc.target/aarch64/ands_2.c: Likewise
9062
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
9064
+ Backport from trunk r199439,199533,201326.
9066
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
9068
+ * gcc.dg/shrink-wrap-alloca.c: New added.
9069
+ * gcc.dg/shrink-wrap-pretend.c: New added.
9070
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
9072
+ 2013-05-31 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
9074
+ * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca.
9076
+ 2013-07-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
9078
+ * gcc.target/arm/pr57637.c: New testcase.
9080
+2013-08-06 Christophe Lyon <christophe.lyon@linaro.org>
9082
+ Backport from trunk r198928,198973,199203,201240,201241.
9083
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
9086
+ * gcc.target/arm/pr40887.c: Adjust testcase.
9087
+ * gcc.target/arm/pr19599.c: New test.
9089
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
9091
+ Backport from trunk r200922.
9092
+ 2013-07-12 Tejas Belagod <tejas.belagod@arm.com>
9094
+ * gcc.target/aarch64/vect-movi.c: New.
9096
+2013-08-05 Yvan Roux <yvan.roux@linaro.org>
9098
+ Backport from trunk r200720.
9099
+ 2013-07-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
9101
+ * gcc.dg/pr57518.c: Adjust scan-rtl-dump-not pattern.
9103
+2013-07-21 Yvan Roux <yvan.roux@linaro.org>
9105
+ Backport from trunk r200204.
9106
+ 2013-06-19 Yufeng Zhang <yufeng.zhang@arm.com>
9108
+ * gcc.dg/torture/stackalign/builtin-apply-2.c: set
9109
+ STACK_ARGUMENTS_SIZE with 0 if __aarch64__ is defined.
9111
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9113
+ GCC Linaro 4.8-2013.07-1 released.
9115
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9117
+ GCC Linaro 4.8-2013.07 released.
9119
+2013-07-03 Christophe Lyon <christophe.lyon@linaro.org>
9121
+ Revert backport from trunk r198928.
9122
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
9125
+ * gcc.target/arm/pr40887.c: Adjust testcase.
9126
+ * gcc.target/arm/pr19599.c: New test.
9128
+2013-07-03 Christophe Lyon <christophe.lyon@linaro.org>
9130
+ Revert backport from trunk 199439, 199533
9131
+ 2013-05-31 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
9133
+ * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca.
9135
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
9137
+ * gcc.dg/shrink-wrap-alloca.c: New added.
9138
+ * gcc.dg/shrink-wrap-pretend.c: New added.
9139
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
9141
+2013-07-02 Rob Savoye <rob.savoye@linaro.org>
9143
+ Backport from trunk 200096
9145
+ 2013-06-14 Vidya Praveen <vidyapraveen@arm.com>
9147
+ * gcc.target/aarch64/vect_smlal_1.c: New file.
9149
+2013-07-02 Rob Savoye <rob.savoye@linaro.org>
9151
+ Backport from trunk 200019
9152
+ 2013-06-12 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
9154
+ * gcc.target/arm/unaligned-memcpy-4.c (src, dst): Initialize
9155
+ to ensure alignment.
9156
+ * gcc.target/arm/unaligned-memcpy-3.c (src): Likewise.
9158
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
9160
+ Backport from trunk 200152
9161
+ 2013-06-17 Sofiane Naci <sofiane.naci@arm.com>
9163
+ * gcc.target/aarch64/scalar_intrinsics.c: Update.
9165
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
9167
+ Backport from trunk 200148
9168
+ 2013-06-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9170
+ * gcc.target/arm/unaligned-memcpy-2.c (dest): Initialize to
9173
+2013-06-20 Rob Savoye <rob.savoye@linaro.org>
9175
+ Backport from trunk 199533
9176
+ 2013-05-31 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
9178
+ * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca.
9180
+2013-06-20 Christophe Lyon <christophe.lyon@linaro.org>
9182
+ Backport from trunk r198683.
9183
+ 2013-05-07 Christophe Lyon <christophe.lyon@linaro.org>
9185
+ * lib/target-supports.exp (check_effective_target_hw): New
9187
+ * c-c++-common/asan/clone-test-1.c: Call
9188
+ check_effective_target_hw.
9189
+ * c-c++-common/asan/rlimit-mmap-test-1.c: Likewise.
9190
+ * c-c++-common/asan/heap-overflow-1.c: Update regexps to accept
9191
+ possible decorations.
9192
+ * c-c++-common/asan/null-deref-1.c: Likewise.
9193
+ * c-c++-common/asan/stack-overflow-1.c: Likewise.
9194
+ * c-c++-common/asan/strncpy-overflow-1.c: Likewise.
9195
+ * c-c++-common/asan/use-after-free-1.c: Likewise.
9196
+ * g++.dg/asan/deep-thread-stack-1.C: Likewise.
9197
+ * g++.dg/asan/large-func-test-1.C: Likewise.
9199
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9201
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9203
+2013-06-06 Zhenqiang Chen <zhenqiang.chen@linaro.org>
9205
+ Backport from mainline r199439.
9206
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
9208
+ * gcc.dg/shrink-wrap-alloca.c: New added.
9209
+ * gcc.dg/shrink-wrap-pretend.c: New added.
9210
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
9212
+2013-06-05 Christophe Lyon <christophe.lyon@linaro.org>
9214
+ Backport from trunk r199658.
9215
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
9217
+ * gcc.target/aarch64/movi_1.c: New test.
9219
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
9221
+ Backport from trunk r199261.
9222
+ 2013-05-23 Christian Bruel <christian.bruel@st.com>
9225
+ * gcc.dg/debug/pr57351.c: New test
9227
+2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
9228
+ Backport from trunk r198890,199254,199294,199454.
9230
+ 2013-05-30 Ian Bolton <ian.bolton@arm.com>
9232
+ * gcc.target/aarch64/insv_1.c: New test.
9234
+ 2013-05-24 Ian Bolton <ian.bolton@arm.com>
9236
+ * gcc.target/aarch64/scalar_intrinsics.c
9237
+ (force_simd): Use a valid instruction.
9238
+ (test_vdupd_lane_s64): Pass a valid lane argument.
9239
+ (test_vdupd_lane_u64): Likewise.
9241
+ 2013-05-23 Vidya Praveen <vidyapraveen@arm.com>
9243
+ * gcc.target/aarch64/vect-clz.c: New file.
9245
+ 2013-05-14 James Greenhalgh <james.greenhalgh@arm.com>
9247
+ * gcc.target/aarch64/vect-fcm.x: Add cases testing
9248
+ FLOAT cmp FLOAT ? INT : INT.
9249
+ * gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE.
9250
+ * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
9251
+ * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
9252
+ * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
9253
+ * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
9254
+ * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
9256
+2013-05-29 Christophe Lyon <christophe.lyon@linaro.org>
9258
+ Backport from trunk r198928.
9259
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
9262
+ * gcc.target/arm/pr40887.c: Adjust testcase.
9263
+ * gcc.target/arm/pr19599.c: New test.
9265
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
9267
+ Backport from trunk r198680.
9268
+ 2013-05-07 Sofiane Naci <sofiane.naci@arm.com>
9270
+ * gcc.target/aarch64/scalar_intrinsics.c: Update.
9272
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
9274
+ Backport from trunk r198499-198500.
9275
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
9276
+ * gcc.target/aarch64/vect-vaddv.c: New.
9278
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
9280
+ * gcc.target/aarch64/vect-vmaxv.c: New.
9281
+ * gcc.target/aarch64/vect-vfmaxv.c: Likewise.
9283
+2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
9285
+ Backport from trunk r198970.
9286
+ 2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
9288
+ * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output.
9289
+ * gcc.target/arm/unaligned-memcpy-3.c: Likewise.
9290
+ * gcc.target/arm/unaligned-memcpy-4.c: Likewise.
9292
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9294
+ GCC Linaro 4.8-2013.05 released.
9296
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9298
+ Backport from trunk r198574-198575.
9299
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
9301
+ * gcc.target/aarch64/fabd.c: New file.
9303
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9305
+ Backport from trunk r198490-198496.
9306
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
9308
+ * gcc.target/aarch64/scalar-vca.c: New.
9309
+ * gcc.target/aarch64/vect-vca.c: Likewise.
9311
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
9313
+ * gcc.target/aarch64/scalar_intrinsics.c (force_simd): New.
9314
+ (test_vceqd_s64): Force arguments to SIMD registers.
9315
+ (test_vceqzd_s64): Likewise.
9316
+ (test_vcged_s64): Likewise.
9317
+ (test_vcled_s64): Likewise.
9318
+ (test_vcgezd_s64): Likewise.
9319
+ (test_vcged_u64): Likewise.
9320
+ (test_vcgtd_s64): Likewise.
9321
+ (test_vcltd_s64): Likewise.
9322
+ (test_vcgtzd_s64): Likewise.
9323
+ (test_vcgtd_u64): Likewise.
9324
+ (test_vclezd_s64): Likewise.
9325
+ (test_vcltzd_s64): Likewise.
9326
+ (test_vtst_s64): Likewise.
9327
+ (test_vtst_u64): Likewise.
9329
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9331
+ Backport from trunk r198191.
9332
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
9334
+ * gcc.target/aarch64/scalar-mov.c: New testcase.
9336
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9338
+ Backport from trunk r197838.
9339
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9341
+ * gcc.target/aarch64/negs.c: New.
9343
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9345
+ Backport from trunk r198019.
9346
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9348
+ * gcc.target/aarch64/adds1.c: New.
9349
+ * gcc.target/aarch64/adds2.c: New.
9350
+ * gcc.target/aarch64/subs1.c: New.
9351
+ * gcc.target/aarch64/subs2.c: New.
9353
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9355
+ Backport from trunk r198394,198396-198400,198402-198404,198406.
9356
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
9358
+ * lib/target-supports.exp (vect_uintfloat_cvt): Enable for AArch64.
9360
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
9362
+ * gcc.target/aarch64/vect-vcvt.c: New.
9364
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
9366
+ * gcc.target/aarch64/vect-vrnd.c: New.
9368
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9370
+ Backport from trunk r198302-198306,198316.
9371
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
9372
+ Tejas Belagod <tejas.belagod@arm.com>
9374
+ * gcc.target/aarch64/vaddv-intrinsic.c: New.
9375
+ * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
9376
+ * gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
9378
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9380
+ * gcc.target/aarch64/cmp.c: New.
9382
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9384
+ * gcc.target/aarch64/ngc.c: New.
9386
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9388
+ Backport from trunk r198298.
9389
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9391
+ * lib/target-supports.exp
9392
+ (check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
9393
+ (check_effective_target_arm_neon_fp16_ok): Likewise.
9394
+ (add_options_for_arm_neon_fp16): Likewise.
9395
+ * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
9396
+ * gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
9398
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9400
+ Backport from trunk r198136-198137,198142,198176
9401
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
9403
+ * gcc.target/aarch64/vrecps.c: New.
9404
+ * gcc.target/aarch64/vrecpx.c: Likewise.
9406
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9408
+ Backport from trunk r198020.
9409
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9411
+ * gcc.target/aarch64/adds3.c: New.
9412
+ * gcc.target/aarch64/subs3.c: New.
9414
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9416
+ Backport from trunk r197965.
9417
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9419
+ * gcc.target/arm/anddi3-opt.c: New test.
9420
+ * gcc.target/arm/anddi3-opt2.c: Likewise.
9422
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9424
+ Backport from trunk r197642.
9425
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9427
+ * gcc.target/arm/minmax_minus.c: New test.
9429
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9431
+ Backport from trunk r197530,197921.
9432
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
9434
+ * gcc.target/arm/peep-ldrd-1.c: New test.
9435
+ * gcc.target/arm/peep-strd-1.c: Likewise.
9437
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9439
+ Backport from trunk r197523.
9440
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9442
+ * lib/target-supports.exp (add_options_for_arm_v8_neon):
9443
+ Add -march=armv8-a when we use v8 NEON.
9444
+ (check_effective_target_vect_call_btruncf): Remove arm-*-*-*.
9445
+ (check_effective_target_vect_call_ceilf): Likewise.
9446
+ (check_effective_target_vect_call_floorf): Likewise.
9447
+ (check_effective_target_vect_call_roundf): Likewise.
9448
+ (check_vect_support_and_set_flags): Remove check for arm_v8_neon.
9449
+ * gcc.target/arm/vect-rounding-btruncf.c: New testcase.
9450
+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise.
9451
+ * gcc.target/arm/vect-rounding-floorf.c: Likewise.
9452
+ * gcc.target/arm/vect-rounding-roundf.c: Likewise.
9454
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9456
+ Backport from trunk r197518-197522,197516-197528.
9457
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
9459
+ * gcc.target/arm/negdi-1.c: New test.
9460
+ * gcc.target/arm/negdi-2.c: Likewise.
9461
+ * gcc.target/arm/negdi-3.c: Likewise.
9462
+ * gcc.target/arm/negdi-4.c: Likewise.
9464
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9466
+ Backport from trunk r197489-197491.
9467
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
9469
+ * lib/target-supports.exp (check_effective_target_arm_v8_neon_hw):
9471
+ (check_effective_target_arm_v8_neon_ok_nocache):
9473
+ (check_effective_target_arm_v8_neon_ok): Change to use
9474
+ check_effective_target_arm_v8_neon_ok_nocache.
9475
+ (add_options_for_arm_v8_neon): Use et_arm_v8_neon_flags to set ARMv8
9477
+ (check_effective_target_vect_call_btruncf):
9478
+ Enable for arm and ARMv8 NEON.
9479
+ (check_effective_target_vect_call_ceilf): Likewise.
9480
+ (check_effective_target_vect_call_floorf): Likewise.
9481
+ (check_effective_target_vect_call_roundf): Likewise.
9482
+ (check_vect_support_and_set_flags): Handle ARMv8 NEON effective
9485
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9487
+ Backport from trunk r196795-196797,196957.
9488
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
9490
+ * gcc.target/aarch64/sbc.c: New test.
9492
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
9494
+ * gcc.target/aarch64/ror.c: New test.
9496
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
9498
+ * gcc.target/aarch64/extr.c: New test.
9500
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9502
+ * GCC Linaro 4.8-2013.04 released.
9504
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9506
+ Backport from trunk r197052.
9507
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
9509
+ * gcc.target/arm/vseleqdf.c: New test.
9510
+ * gcc.target/arm/vseleqsf.c: Likewise.
9511
+ * gcc.target/arm/vselgedf.c: Likewise.
9512
+ * gcc.target/arm/vselgesf.c: Likewise.
9513
+ * gcc.target/arm/vselgtdf.c: Likewise.
9514
+ * gcc.target/arm/vselgtsf.c: Likewise.
9515
+ * gcc.target/arm/vselledf.c: Likewise.
9516
+ * gcc.target/arm/vsellesf.c: Likewise.
9517
+ * gcc.target/arm/vselltdf.c: Likewise.
9518
+ * gcc.target/arm/vselltsf.c: Likewise.
9519
+ * gcc.target/arm/vselnedf.c: Likewise.
9520
+ * gcc.target/arm/vselnesf.c: Likewise.
9521
+ * gcc.target/arm/vselvcdf.c: Likewise.
9522
+ * gcc.target/arm/vselvcsf.c: Likewise.
9523
+ * gcc.target/arm/vselvsdf.c: Likewise.
9524
+ * gcc.target/arm/vselvssf.c: Likewise.
9526
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9528
+ Backport from trunk r197051.
9529
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
9531
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Move test
9533
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.x: ... to here.
9534
+ * gcc.target/aarch64/atomic-op-acq_rel.c: Move test body from here...
9535
+ * gcc.target/aarch64/atomic-op-acq_rel.x: ... to here.
9536
+ * gcc.target/aarch64/atomic-op-acquire.c: Move test body from here...
9537
+ * gcc.target/aarch64/atomic-op-acquire.x: ... to here.
9538
+ * gcc.target/aarch64/atomic-op-char.c: Move test body from here...
9539
+ * gcc.target/aarch64/atomic-op-char.x: ... to here.
9540
+ * gcc.target/aarch64/atomic-op-consume.c: Move test body from here...
9541
+ * gcc.target/aarch64/atomic-op-consume.x: ... to here.
9542
+ * gcc.target/aarch64/atomic-op-int.c: Move test body from here...
9543
+ * gcc.target/aarch64/atomic-op-int.x: ... to here.
9544
+ * gcc.target/aarch64/atomic-op-relaxed.c: Move test body from here...
9545
+ * gcc.target/aarch64/atomic-op-relaxed.x: ... to here.
9546
+ * gcc.target/aarch64/atomic-op-release.c: Move test body from here...
9547
+ * gcc.target/aarch64/atomic-op-release.x: ... to here.
9548
+ * gcc.target/aarch64/atomic-op-seq_cst.c: Move test body from here...
9549
+ * gcc.target/aarch64/atomic-op-seq_cst.x: ... to here.
9550
+ * gcc.target/aarch64/atomic-op-short.c: Move test body from here...
9551
+ * gcc.target/aarch64/atomic-op-short.x: ... to here.
9552
+ * gcc.target/arm/atomic-comp-swap-release-acquire.c: New test.
9553
+ * gcc.target/arm/atomic-op-acq_rel.c: Likewise.
9554
+ * gcc.target/arm/atomic-op-acquire.c: Likewise.
9555
+ * gcc.target/arm/atomic-op-char.c: Likewise.
9556
+ * gcc.target/arm/atomic-op-consume.c: Likewise.
9557
+ * gcc.target/arm/atomic-op-int.c: Likewise.
9558
+ * gcc.target/arm/atomic-op-relaxed.c: Likewise.
9559
+ * gcc.target/arm/atomic-op-release.c: Likewise.
9560
+ * gcc.target/arm/atomic-op-seq_cst.c: Likewise.
9561
+ * gcc.target/arm/atomic-op-short.c: Likewise.
9563
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9565
+ Backport from trunk r196876.
9566
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
9568
+ * gcc.target/arm/neon-for-64bits-1.c: New tests.
9569
+ * gcc.target/arm/neon-for-64bits-2.c: Likewise.
9571
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9573
+ Backport from trunk r196858.
9574
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
9576
+ * gcc.target/aarch64/vect.c: Test and result vector added
9577
+ for sabd and saba instructions.
9578
+ * gcc.target/aarch64/vect-compile.c: Check for sabd and saba
9579
+ instructions in assembly.
9580
+ * gcc.target/aarch64/vect.x: Add sabd and saba test functions.
9581
+ * gcc.target/aarch64/vect-fp.c: Test and result vector added
9582
+ for fabd instruction.
9583
+ * gcc.target/aarch64/vect-fp-compile.c: Check for fabd
9584
+ instruction in assembly.
9585
+ * gcc.target/aarch64/vect-fp.x: Add fabd test function.
9586
--- a/src/gcc/testsuite/gcc.dg/pr57518.c
9587
+++ b/src/gcc/testsuite/gcc.dg/pr57518.c
9590
/* { dg-do compile } */
9591
/* { dg-options "-O2 -fdump-rtl-ira" } */
9592
-/* { dg-final { scan-rtl-dump-not "REG_EQUIV.*mem.*\"ip\"" "ira" } } */
9593
+/* { dg-final { scan-rtl-dump-not "REG_EQUIV\[^\n\]*mem\[^\n\]*\"ip\"" "ira" } } */
9597
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c
9598
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c
9600
+/* { dg-do compile } */
9601
+/* { dg-options "-O2 -g" } */
9609
+ p = __builtin_alloca (4);
9611
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c
9612
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c
9614
+/* { dg-do compile } */
9615
+/* { dg-options "-O2 -g" } */
9617
+#include <stdlib.h>
9619
+#include <stdarg.h>
9621
+#define DEBUG_BUFFER_SIZE 80
9622
+int unifi_debug = 5;
9625
+unifi_trace (void* ospriv, int level, const char *fmt, ...)
9627
+ static char s[DEBUG_BUFFER_SIZE];
9634
+ if (unifi_debug >= level)
9636
+ va_start (args, fmt);
9637
+ len = vsnprintf (&(s)[0], (DEBUG_BUFFER_SIZE), fmt, args);
9640
+ if (len >= DEBUG_BUFFER_SIZE)
9642
+ (s)[DEBUG_BUFFER_SIZE - 2] = '\n';
9643
+ (s)[DEBUG_BUFFER_SIZE - 1] = 0;
9650
--- a/src/gcc/testsuite/gcc.dg/debug/pr57351.c
9651
+++ b/src/gcc/testsuite/gcc.dg/debug/pr57351.c
9653
+/* { dg-do compile } */
9654
+/* { dg-require-effective-target arm_neon } */
9655
+/* { dg-options "-std=c99 -Os -g -march=armv7-a" } */
9656
+/* { dg-add-options arm_neon } */
9658
+typedef unsigned int size_t;
9659
+typedef int ptrdiff_t;
9660
+typedef signed char int8_t ;
9661
+typedef signed long long int64_t;
9662
+typedef int8_t GFC_INTEGER_1;
9663
+typedef GFC_INTEGER_1 GFC_LOGICAL_1;
9664
+typedef int64_t GFC_INTEGER_8;
9665
+typedef GFC_INTEGER_8 GFC_LOGICAL_8;
9666
+typedef ptrdiff_t index_type;
9667
+typedef struct descriptor_dimension
9669
+ index_type lower_bound;
9670
+ index_type _ubound;
9672
+descriptor_dimension;
9673
+typedef struct { GFC_LOGICAL_1 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l1;
9674
+typedef struct { GFC_LOGICAL_8 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l8;
9676
+all_l8 (gfc_array_l8 * const restrict retarray,
9677
+ gfc_array_l1 * const restrict array,
9678
+ const index_type * const restrict pdim)
9680
+ GFC_LOGICAL_8 * restrict dest;
9685
+ dim = (*pdim) - 1;
9686
+ len = ((array)->dim[dim]._ubound + 1 - (array)->dim[dim].lower_bound);
9687
+ for (n = 0; n < dim; n++)
9689
+ const GFC_LOGICAL_1 * restrict src;
9690
+ GFC_LOGICAL_8 result;
9694
+ for (n = 0; n < len; n++, src += delta)
9707
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c
9708
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c
9710
+/* { dg-do compile } */
9711
+/* { dg-options "-O2 -g" } */
9713
+unsigned char a, b, d, f, g;
9720
+ if (c == 0) return test ();
9724
+ int e = (a & 0x0f) - (g & 0x0f);
9726
+ if (!a) b |= 0x80;
9728
+ f = g/5 + a*3879 + b *2985;
9732
+ f = g + a*39879 + b *25;
9736
--- a/src/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c
9737
+++ b/src/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c
9739
E, F and G are passed on stack. So the size of the stack argument
9741
#define STACK_ARGUMENTS_SIZE 20
9742
-#elif defined __MMIX__
9743
+#elif defined __aarch64__ || defined __MMIX__
9744
/* No parameters on stack for bar. */
9745
#define STACK_ARGUMENTS_SIZE 0
9747
--- a/src/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
9748
+++ b/src/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
9750
-/* { dg-require-effective-target vect_int } */
9751
+/* { dg-require-effective-target vect_int }
9752
+ { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} }
9756
#include "tree-vect.h"
9757
--- a/src/gcc/testsuite/g++.dg/asan/large-func-test-1.C
9758
+++ b/src/gcc/testsuite/g++.dg/asan/large-func-test-1.C
9761
// { dg-output "ERROR: AddressSanitizer:? heap-buffer-overflow on address\[^\n\r]*" }
9762
// { dg-output "0x\[0-9a-f\]+ at pc 0x\[0-9a-f\]+ bp 0x\[0-9a-f\]+ sp 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" }
9763
-// { dg-output "READ of size 4 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" }
9764
+// { dg-output "\[^\n\r]*READ of size 4 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" }
9765
// { dg-output " #0 0x\[0-9a-f\]+ (in \[^\n\r]*LargeFunction\[^\n\r]*(large-func-test-1.C:18|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" }
9766
-// { dg-output "0x\[0-9a-f\]+ is located 44 bytes to the right of 400-byte region.*(\n|\r\n|\r)" }
9767
-// { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" }
9768
+// { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 44 bytes to the right of 400-byte region.*(\n|\r\n|\r)" }
9769
+// { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" }
9770
// { dg-output " #0( 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" }
9771
// { dg-output " #1|) 0x\[0-9a-f\]+ (in (operator new|_*_Zn\[aw\]\[mj\])|\[(\])\[^\n\r]*(\n|\r\n|\r)" }
9772
--- a/src/gcc/testsuite/g++.dg/asan/deep-thread-stack-1.C
9773
+++ b/src/gcc/testsuite/g++.dg/asan/deep-thread-stack-1.C
9777
// { dg-output "ERROR: AddressSanitizer: heap-use-after-free.*(\n|\r\n|\r)" }
9778
-// { dg-output "WRITE of size 4 at 0x\[0-9a-f\]+ thread T(\[0-9\]+).*(\n|\r\n|\r)" }
9779
-// { dg-output "freed by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9780
-// { dg-output "previously allocated by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9781
+// { dg-output "\[^\n\r]*WRITE of size 4 at 0x\[0-9a-f\]+ thread T(\[0-9\]+).*(\n|\r\n|\r)" }
9782
+// { dg-output "\[^\n\r]*freed by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9783
+// { dg-output "\[^\n\r]*previously allocated by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9784
// { dg-output "Thread T\\2 created by T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9785
// { dg-output "Thread T\\8 created by T0 here:.*(\n|\r\n|\r)" }
9786
// { dg-output "Thread T\\4 created by T(\[0-9\]+) here:.*(\n|\r\n|\r)" }
9787
--- a/src/gcc/testsuite/c-c++-common/asan/strncpy-overflow-1.c
9788
+++ b/src/gcc/testsuite/c-c++-common/asan/strncpy-overflow-1.c
9790
/* { dg-output "WRITE of size \[0-9\]* at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */
9791
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)strncpy|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9792
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*strncpy-overflow-1.c:11|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */
9793
-/* { dg-output "0x\[0-9a-f\]+ is located 0 bytes to the right of 9-byte region\[^\n\r]*(\n|\r\n|\r)" } */
9794
-/* { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9795
+/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 0 bytes to the right of 9-byte region\[^\n\r]*(\n|\r\n|\r)" } */
9796
+/* { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9797
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9798
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*strncpy-overflow-1.c:10|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9799
--- a/src/gcc/testsuite/c-c++-common/asan/rlimit-mmap-test-1.c
9800
+++ b/src/gcc/testsuite/c-c++-common/asan/rlimit-mmap-test-1.c
9803
/* { dg-do run { target setrlimit } } */
9804
/* { dg-skip-if "" { *-*-* } { "*" } { "-O0" } } */
9805
+/* { dg-require-effective-target hw } */
9806
/* { dg-shouldfail "asan" } */
9809
--- a/src/gcc/testsuite/c-c++-common/asan/stack-overflow-1.c
9810
+++ b/src/gcc/testsuite/c-c++-common/asan/stack-overflow-1.c
9813
/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */
9814
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*stack-overflow-1.c:16|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */
9815
-/* { dg-output "Address 0x\[0-9a-f\]+ is\[^\n\r]*frame <main>" } */
9816
+/* { dg-output "\[^\n\r]*Address 0x\[0-9a-f\]+ is\[^\n\r]*frame <main>" } */
9817
--- a/src/gcc/testsuite/c-c++-common/asan/use-after-free-1.c
9818
+++ b/src/gcc/testsuite/c-c++-common/asan/use-after-free-1.c
9821
/* { dg-output "ERROR: AddressSanitizer:? heap-use-after-free on address\[^\n\r]*" } */
9822
/* { dg-output "0x\[0-9a-f\]+ at pc 0x\[0-9a-f\]+ bp 0x\[0-9a-f\]+ sp 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */
9823
-/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */
9824
+/* { dg-output "\[^\n\r]*READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */
9825
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:9|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */
9826
-/* { dg-output "0x\[0-9a-f\]+ is located 5 bytes inside of 10-byte region .0x\[0-9a-f\]+,0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */
9827
-/* { dg-output "freed by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9828
+/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 5 bytes inside of 10-byte region .0x\[0-9a-f\]+,0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */
9829
+/* { dg-output "\[^\n\r]*freed by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9830
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)free|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9831
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:8|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */
9832
-/* { dg-output "previously allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9833
+/* { dg-output "\[^\n\r]*previously allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9834
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9835
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:7|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9836
--- a/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c
9837
+++ b/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c
9840
/* { dg-do run { target { *-*-linux* } } } */
9841
/* { dg-require-effective-target clone } */
9842
+/* { dg-require-effective-target hw } */
9843
/* { dg-options "-D_GNU_SOURCE" } */
9846
--- a/src/gcc/testsuite/c-c++-common/asan/heap-overflow-1.c
9847
+++ b/src/gcc/testsuite/c-c++-common/asan/heap-overflow-1.c
9850
/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0.*(\n|\r\n|\r)" } */
9851
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*heap-overflow-1.c:21|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */
9852
-/* { dg-output "0x\[0-9a-f\]+ is located 0 bytes to the right of 10-byte region\[^\n\r]*(\n|\r\n|\r)" } */
9853
-/* { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9854
+/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 0 bytes to the right of 10-byte region\[^\n\r]*(\n|\r\n|\r)" } */
9855
+/* { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */
9856
/* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9857
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*heap-overflow-1.c:19|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9858
--- a/src/gcc/testsuite/c-c++-common/asan/null-deref-1.c
9859
+++ b/src/gcc/testsuite/c-c++-common/asan/null-deref-1.c
9862
/* { dg-output "ERROR: AddressSanitizer:? SEGV on unknown address\[^\n\r]*" } */
9863
/* { dg-output "0x\[0-9a-f\]+ \[^\n\r]*pc 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */
9864
-/* { dg-output "AddressSanitizer can not provide additional info.*(\n|\r\n|\r)" } */
9865
+/* { dg-output "\[^\n\r]*AddressSanitizer can not provide additional info.*(\n|\r\n|\r)" } */
9866
/* { dg-output " #0 0x\[0-9a-f\]+ (in \[^\n\r]*NullDeref\[^\n\r]* (\[^\n\r]*null-deref-1.c:10|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9867
/* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*null-deref-1.c:15|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */
9868
--- a/src/gcc/objcp/ChangeLog.linaro
9869
+++ b/src/gcc/objcp/ChangeLog.linaro
9871
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9873
+ GCC Linaro 4.8-2013.07-1 released.
9875
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9877
+ GCC Linaro 4.8-2013.07 released.
9879
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9881
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9883
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9885
+ GCC Linaro 4.8-2013.05 released.
9887
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9889
+ * GCC Linaro 4.8-2013.04 released.
9890
--- a/src/gcc/cp/ChangeLog.linaro
9891
+++ b/src/gcc/cp/ChangeLog.linaro
9893
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9895
+ GCC Linaro 4.8-2013.07-1 released.
9897
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9899
+ GCC Linaro 4.8-2013.07 released.
9901
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9903
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9905
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9907
+ GCC Linaro 4.8-2013.05 released.
9909
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9911
+ * GCC Linaro 4.8-2013.04 released.
9912
--- a/src/gcc/rtl.def
9913
+++ b/src/gcc/rtl.def
9915
relational operator. Operands should have only one alternative.
9916
1: A C expression giving an additional condition for recognizing
9917
the generated pattern.
9918
- 2: A template or C code to produce assembler output. */
9919
-DEF_RTL_EXPR(DEFINE_COND_EXEC, "define_cond_exec", "Ess", RTX_EXTRA)
9920
+ 2: A template or C code to produce assembler output.
9921
+ 3: A vector of attributes to append to the resulting cond_exec insn. */
9922
+DEF_RTL_EXPR(DEFINE_COND_EXEC, "define_cond_exec", "EssV", RTX_EXTRA)
9924
/* Definition of an operand predicate. The difference between
9925
DEFINE_PREDICATE and DEFINE_SPECIAL_PREDICATE is that genrecog will
9926
--- a/src/gcc/go/ChangeLog.linaro
9927
+++ b/src/gcc/go/ChangeLog.linaro
9929
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9931
+ GCC Linaro 4.8-2013.07-1 released.
9933
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9935
+ GCC Linaro 4.8-2013.07 released.
9937
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9939
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9941
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9943
+ GCC Linaro 4.8-2013.05 released.
9945
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9947
+ * GCC Linaro 4.8-2013.04 released.
9948
--- a/src/gcc/ada/ChangeLog.linaro
9949
+++ b/src/gcc/ada/ChangeLog.linaro
9951
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9953
+ GCC Linaro 4.8-2013.07-1 released.
9955
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9957
+ GCC Linaro 4.8-2013.07 released.
9959
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9961
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9963
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9965
+ GCC Linaro 4.8-2013.05 released.
9967
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9969
+ * GCC Linaro 4.8-2013.04 released.
9970
--- a/src/gcc/common/config/aarch64/aarch64-common.c
9971
+++ b/src/gcc/common/config/aarch64/aarch64-common.c
9974
/* Enable section anchors by default at -O1 or higher. */
9975
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
9976
+ /* Enable redundant extension instructions removal at -O2 and higher. */
9977
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
9978
{ OPT_LEVELS_NONE, 0, NULL, 0 }
9981
--- a/src/gcc/fortran/ChangeLog.linaro
9982
+++ b/src/gcc/fortran/ChangeLog.linaro
9984
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9986
+ GCC Linaro 4.8-2013.07-1 released.
9988
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
9990
+ GCC Linaro 4.8-2013.07 released.
9992
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
9994
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
9996
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
9998
+ GCC Linaro 4.8-2013.05 released.
10000
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10002
+ * GCC Linaro 4.8-2013.04 released.
10003
--- a/src/gcc/configure.ac
10004
+++ b/src/gcc/configure.ac
10005
@@ -813,7 +813,7 @@
10007
AC_SUBST(CONFIGURE_SPECS)
10009
-ACX_PKGVERSION([GCC])
10010
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
10011
ACX_BUGURL([http://gcc.gnu.org/bugs.html])
10013
# Sanity check enable_languages in case someone does not run the toplevel
10014
--- a/src/gcc/function.c
10015
+++ b/src/gcc/function.c
10016
@@ -5509,22 +5509,45 @@
10017
except for any part that overlaps SRC (next loop). */
10018
bb_uses = &DF_LR_BB_INFO (bb)->use;
10019
bb_defs = &DF_LR_BB_INFO (bb)->def;
10020
- for (i = dregno; i < end_dregno; i++)
10023
- if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i))
10024
- next_block = NULL;
10025
- CLEAR_REGNO_REG_SET (live_out, i);
10026
- CLEAR_REGNO_REG_SET (live_in, i);
10027
+ for (i = dregno; i < end_dregno; i++)
10029
+ if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i)
10030
+ || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
10031
+ next_block = NULL;
10032
+ CLEAR_REGNO_REG_SET (live_out, i);
10033
+ CLEAR_REGNO_REG_SET (live_in, i);
10036
+ /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
10037
+ Either way, SRC is now live on entry. */
10038
+ for (i = sregno; i < end_sregno; i++)
10040
+ if (REGNO_REG_SET_P (bb_defs, i)
10041
+ || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
10042
+ next_block = NULL;
10043
+ SET_REGNO_REG_SET (live_out, i);
10044
+ SET_REGNO_REG_SET (live_in, i);
10049
+ /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
10050
+ DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
10051
+ at -O1, just give up searching NEXT_BLOCK. */
10052
+ next_block = NULL;
10053
+ for (i = dregno; i < end_dregno; i++)
10055
+ CLEAR_REGNO_REG_SET (live_out, i);
10056
+ CLEAR_REGNO_REG_SET (live_in, i);
10059
- /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
10060
- Either way, SRC is now live on entry. */
10061
- for (i = sregno; i < end_sregno; i++)
10063
- if (REGNO_REG_SET_P (bb_defs, i))
10064
- next_block = NULL;
10065
- SET_REGNO_REG_SET (live_out, i);
10066
- SET_REGNO_REG_SET (live_in, i);
10067
+ for (i = sregno; i < end_sregno; i++)
10069
+ SET_REGNO_REG_SET (live_out, i);
10070
+ SET_REGNO_REG_SET (live_in, i);
10074
/* If we don't need to add the move to BB, look for a single
10075
--- a/src/gcc/coretypes.h
10076
+++ b/src/gcc/coretypes.h
10078
typedef union gimple_statement_d *gimple;
10079
typedef const union gimple_statement_d *const_gimple;
10080
typedef gimple gimple_seq;
10081
+struct gimple_stmt_iterator_d;
10082
+typedef struct gimple_stmt_iterator_d gimple_stmt_iterator;
10084
typedef union section section;
10085
struct gcc_options;
10086
--- a/src/gcc/gimple-fold.c
10087
+++ b/src/gcc/gimple-fold.c
10088
@@ -1143,6 +1143,8 @@
10089
gimplify_and_update_call_from_tree (gsi, result);
10092
+ else if (DECL_BUILT_IN_CLASS (callee) == BUILT_IN_MD)
10093
+ changed |= targetm.gimple_fold_builtin (gsi);
10097
--- a/src/gcc/lto/ChangeLog.linaro
10098
+++ b/src/gcc/lto/ChangeLog.linaro
10100
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10102
+ GCC Linaro 4.8-2013.07-1 released.
10104
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
10106
+ GCC Linaro 4.8-2013.07 released.
10108
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
10110
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
10112
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10114
+ GCC Linaro 4.8-2013.05 released.
10116
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10118
+ * GCC Linaro 4.8-2013.04 released.
10119
--- a/src/gcc/po/ChangeLog.linaro
10120
+++ b/src/gcc/po/ChangeLog.linaro
10122
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10124
+ GCC Linaro 4.8-2013.07-1 released.
10126
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
10128
+ GCC Linaro 4.8-2013.07 released.
10130
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
10132
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
10134
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10136
+ GCC Linaro 4.8-2013.05 released.
10138
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
10140
+ * GCC Linaro 4.8-2013.04 released.
10141
--- a/src/gcc/gimple.h
10142
+++ b/src/gcc/gimple.h
10143
@@ -130,7 +130,7 @@
10145
/* Iterator object for GIMPLE statement sequences. */
10148
+struct gimple_stmt_iterator_d
10150
/* Sequence node holding the current statement. */
10151
gimple_seq_node ptr;
10152
@@ -141,9 +141,8 @@
10153
block/sequence is removed. */
10156
-} gimple_stmt_iterator;
10160
/* Data structure definitions for GIMPLE tuples. NOTE: word markers
10161
are for 64 bit hosts. */
10163
--- a/src/gcc/config/aarch64/aarch64-simd.md
10164
+++ b/src/gcc/config/aarch64/aarch64-simd.md
10167
; Main data types used by the insntructions
10169
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
10170
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
10171
(const_string "unknown"))
10175
; simd_dup duplicate element.
10176
; simd_dupgp duplicate general purpose register.
10177
; simd_ext bitwise extract from pair.
10178
+; simd_fabd floating point absolute difference.
10179
; simd_fadd floating point add/sub.
10180
; simd_fcmp floating point compare.
10181
; simd_fcvti floating point convert to integer.
10183
; simd_fmul floating point multiply.
10184
; simd_fmul_elt floating point multiply (by element).
10185
; simd_fnegabs floating point neg/abs.
10186
-; simd_frcpe floating point reciprocal estimate.
10187
-; simd_frcps floating point reciprocal step.
10188
-; simd_frecx floating point reciprocal exponent.
10189
+; simd_frecpe floating point reciprocal estimate.
10190
+; simd_frecps floating point reciprocal step.
10191
+; simd_frecpx floating point reciprocal exponent.
10192
; simd_frint floating point round to integer.
10193
; simd_fsqrt floating point square root.
10194
; simd_icvtf integer convert to floating point.
10195
@@ -147,6 +148,7 @@
10203
@@ -161,9 +163,9 @@
10216
@@ -303,8 +305,8 @@
10217
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
10218
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
10219
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
10220
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
10221
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
10222
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
10223
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
10224
(eq_attr "simd_type" "none") (const_string "none")
10226
(const_string "unknown")))
10227
@@ -355,15 +357,17 @@
10228
(set_attr "simd_mode" "<MODE>")]
10231
-(define_insn "aarch64_dup_lane<mode>"
10232
- [(set (match_operand:SDQ_I 0 "register_operand" "=w")
10233
+(define_insn "aarch64_dup_lane_scalar<mode>"
10234
+ [(set (match_operand:<VEL> 0 "register_operand" "=w, r")
10236
- (match_operand:<VCON> 1 "register_operand" "w")
10237
- (parallel [(match_operand:SI 2 "immediate_operand" "i")])
10238
+ (match_operand:VDQ 1 "register_operand" "w, w")
10239
+ (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])
10242
- "dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
10243
- [(set_attr "simd_type" "simd_dup")
10245
+ dup\\t%<Vetype>0, %1.<Vetype>[%2]
10246
+ umov\\t%<vw>0, %1.<Vetype>[%2]"
10247
+ [(set_attr "simd_type" "simd_dup, simd_movgp")
10248
(set_attr "simd_mode" "<MODE>")]
10251
@@ -394,7 +398,7 @@
10252
case 4: return "ins\t%0.d[0], %1";
10253
case 5: return "mov\t%0, %1";
10255
- return aarch64_output_simd_mov_immediate (&operands[1],
10256
+ return aarch64_output_simd_mov_immediate (operands[1],
10258
default: gcc_unreachable ();
10260
@@ -414,16 +418,20 @@
10262
switch (which_alternative)
10264
- case 0: return "ld1\t{%0.<Vtype>}, %1";
10265
- case 1: return "st1\t{%1.<Vtype>}, %0";
10266
- case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
10267
- case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
10268
- case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
10269
- case 5: return "#";
10271
+ return "ld1\t{%0.<Vtype>}, %1";
10273
+ return "st1\t{%1.<Vtype>}, %0";
10275
+ return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
10281
- return aarch64_output_simd_mov_immediate (&operands[1],
10282
- <MODE>mode, 128);
10283
- default: gcc_unreachable ();
10284
+ return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
10286
+ gcc_unreachable ();
10289
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
10290
@@ -452,6 +460,77 @@
10291
aarch64_simd_disambiguate_copy (operands, dest, src, 2);
10295
+ [(set (match_operand:VQ 0 "register_operand" "")
10296
+ (match_operand:VQ 1 "register_operand" ""))]
10297
+ "TARGET_SIMD && reload_completed
10298
+ && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
10299
+ || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
10302
+ aarch64_split_simd_move (operands[0], operands[1]);
10306
+(define_expand "aarch64_split_simd_mov<mode>"
10307
+ [(set (match_operand:VQ 0)
10308
+ (match_operand:VQ 1))]
10311
+ rtx dst = operands[0];
10312
+ rtx src = operands[1];
10314
+ if (GP_REGNUM_P (REGNO (src)))
10316
+ rtx src_low_part = gen_lowpart (<VHALF>mode, src);
10317
+ rtx src_high_part = gen_highpart (<VHALF>mode, src);
10320
+ (gen_move_lo_quad_<mode> (dst, src_low_part));
10322
+ (gen_move_hi_quad_<mode> (dst, src_high_part));
10327
+ rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
10328
+ rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
10329
+ rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
10330
+ rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
10333
+ (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
10335
+ (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
10341
+(define_insn "aarch64_simd_mov_from_<mode>low"
10342
+ [(set (match_operand:<VHALF> 0 "register_operand" "=r")
10343
+ (vec_select:<VHALF>
10344
+ (match_operand:VQ 1 "register_operand" "w")
10345
+ (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
10346
+ "TARGET_SIMD && reload_completed"
10347
+ "umov\t%0, %1.d[0]"
10348
+ [(set_attr "simd_type" "simd_movgp")
10349
+ (set_attr "simd_mode" "<MODE>")
10350
+ (set_attr "length" "4")
10353
+(define_insn "aarch64_simd_mov_from_<mode>high"
10354
+ [(set (match_operand:<VHALF> 0 "register_operand" "=r")
10355
+ (vec_select:<VHALF>
10356
+ (match_operand:VQ 1 "register_operand" "w")
10357
+ (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
10358
+ "TARGET_SIMD && reload_completed"
10359
+ "umov\t%0, %1.d[1]"
10360
+ [(set_attr "simd_type" "simd_movgp")
10361
+ (set_attr "simd_mode" "<MODE>")
10362
+ (set_attr "length" "4")
10365
(define_insn "orn<mode>3"
10366
[(set (match_operand:VDQ 0 "register_operand" "=w")
10367
(ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
10368
@@ -503,8 +582,8 @@
10371
(define_insn "neg<mode>2"
10372
- [(set (match_operand:VDQM 0 "register_operand" "=w")
10373
- (neg:VDQM (match_operand:VDQM 1 "register_operand" "w")))]
10374
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
10375
+ (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
10377
"neg\t%0.<Vtype>, %1.<Vtype>"
10378
[(set_attr "simd_type" "simd_negabs")
10379
@@ -520,6 +599,51 @@
10380
(set_attr "simd_mode" "<MODE>")]
10383
+(define_insn "abd<mode>_3"
10384
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
10385
+ (abs:VDQ_BHSI (minus:VDQ_BHSI
10386
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
10387
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
10389
+ "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10390
+ [(set_attr "simd_type" "simd_abd")
10391
+ (set_attr "simd_mode" "<MODE>")]
10394
+(define_insn "aba<mode>_3"
10395
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
10396
+ (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
10397
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
10398
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")))
10399
+ (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
10401
+ "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10402
+ [(set_attr "simd_type" "simd_abd")
10403
+ (set_attr "simd_mode" "<MODE>")]
10406
+(define_insn "fabd<mode>_3"
10407
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
10408
+ (abs:VDQF (minus:VDQF
10409
+ (match_operand:VDQF 1 "register_operand" "w")
10410
+ (match_operand:VDQF 2 "register_operand" "w"))))]
10412
+ "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10413
+ [(set_attr "simd_type" "simd_fabd")
10414
+ (set_attr "simd_mode" "<MODE>")]
10417
+(define_insn "*fabd_scalar<mode>3"
10418
+ [(set (match_operand:GPF 0 "register_operand" "=w")
10419
+ (abs:GPF (minus:GPF
10420
+ (match_operand:GPF 1 "register_operand" "w")
10421
+ (match_operand:GPF 2 "register_operand" "w"))))]
10423
+ "fabd\t%<s>0, %<s>1, %<s>2"
10424
+ [(set_attr "simd_type" "simd_fabd")
10425
+ (set_attr "mode" "<MODE>")]
10428
(define_insn "and<mode>3"
10429
[(set (match_operand:VDQ 0 "register_operand" "=w")
10430
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
10431
@@ -904,12 +1028,12 @@
10434
;; Max/Min operations.
10435
-(define_insn "<maxmin><mode>3"
10436
+(define_insn "<su><maxmin><mode>3"
10437
[(set (match_operand:VQ_S 0 "register_operand" "=w")
10438
(MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
10439
(match_operand:VQ_S 2 "register_operand" "w")))]
10441
- "<maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10442
+ "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10443
[(set_attr "simd_type" "simd_minmax")
10444
(set_attr "simd_mode" "<MODE>")]
10446
@@ -917,29 +1041,39 @@
10447
;; Move into low-half clearing high half to 0.
10449
(define_insn "move_lo_quad_<mode>"
10450
- [(set (match_operand:VQ 0 "register_operand" "=w")
10451
+ [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
10453
- (match_operand:<VHALF> 1 "register_operand" "w")
10454
+ (match_operand:<VHALF> 1 "register_operand" "w,r,r")
10455
(vec_duplicate:<VHALF> (const_int 0))))]
10457
- "mov\\t%d0, %d1";
10458
- [(set_attr "simd_type" "simd_dup")
10459
- (set_attr "simd_mode" "<MODE>")]
10461
+ dup\\t%d0, %1.d[0]
10464
+ [(set_attr "v8type" "*,fmov,*")
10465
+ (set_attr "simd_type" "simd_dup,*,simd_dup")
10466
+ (set_attr "simd_mode" "<MODE>")
10467
+ (set_attr "simd" "yes,*,yes")
10468
+ (set_attr "fp" "*,yes,*")
10469
+ (set_attr "length" "4")]
10472
;; Move into high-half.
10474
(define_insn "aarch64_simd_move_hi_quad_<mode>"
10475
- [(set (match_operand:VQ 0 "register_operand" "+w")
10476
+ [(set (match_operand:VQ 0 "register_operand" "+w,w")
10478
(vec_select:<VHALF>
10480
(match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
10481
- (match_operand:<VHALF> 1 "register_operand" "w")))]
10482
+ (match_operand:<VHALF> 1 "register_operand" "w,r")))]
10484
- "ins\\t%0.d[1], %1.d[0]";
10485
- [(set_attr "simd_type" "simd_ins")
10486
- (set_attr "simd_mode" "<MODE>")]
10488
+ ins\\t%0.d[1], %1.d[0]
10489
+ ins\\t%0.d[1], %1"
10490
+ [(set_attr "simd_type" "simd_ins,simd_ins")
10491
+ (set_attr "simd_mode" "<MODE>")
10492
+ (set_attr "length" "4")]
10495
(define_expand "move_hi_quad_<mode>"
10496
@@ -1045,6 +1179,104 @@
10498
;; Widening arithmetic.
10500
+(define_insn "*aarch64_<su>mlal_lo<mode>"
10501
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10504
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10505
+ (match_operand:VQW 2 "register_operand" "w")
10506
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
10507
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10508
+ (match_operand:VQW 4 "register_operand" "w")
10510
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
10512
+ "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
10513
+ [(set_attr "simd_type" "simd_mlal")
10514
+ (set_attr "simd_mode" "<MODE>")]
10517
+(define_insn "*aarch64_<su>mlal_hi<mode>"
10518
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10521
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10522
+ (match_operand:VQW 2 "register_operand" "w")
10523
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
10524
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10525
+ (match_operand:VQW 4 "register_operand" "w")
10527
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
10529
+ "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
10530
+ [(set_attr "simd_type" "simd_mlal")
10531
+ (set_attr "simd_mode" "<MODE>")]
10534
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
10535
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10537
+ (match_operand:<VWIDE> 1 "register_operand" "0")
10539
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10540
+ (match_operand:VQW 2 "register_operand" "w")
10541
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
10542
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10543
+ (match_operand:VQW 4 "register_operand" "w")
10544
+ (match_dup 3))))))]
10546
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
10547
+ [(set_attr "simd_type" "simd_mlal")
10548
+ (set_attr "simd_mode" "<MODE>")]
10551
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
10552
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10554
+ (match_operand:<VWIDE> 1 "register_operand" "0")
10556
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10557
+ (match_operand:VQW 2 "register_operand" "w")
10558
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
10559
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10560
+ (match_operand:VQW 4 "register_operand" "w")
10561
+ (match_dup 3))))))]
10563
+ "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
10564
+ [(set_attr "simd_type" "simd_mlal")
10565
+ (set_attr "simd_mode" "<MODE>")]
10568
+(define_insn "*aarch64_<su>mlal<mode>"
10569
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10572
+ (ANY_EXTEND:<VWIDE>
10573
+ (match_operand:VDW 1 "register_operand" "w"))
10574
+ (ANY_EXTEND:<VWIDE>
10575
+ (match_operand:VDW 2 "register_operand" "w")))
10576
+ (match_operand:<VWIDE> 3 "register_operand" "0")))]
10578
+ "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
10579
+ [(set_attr "simd_type" "simd_mlal")
10580
+ (set_attr "simd_mode" "<MODE>")]
10583
+(define_insn "*aarch64_<su>mlsl<mode>"
10584
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10586
+ (match_operand:<VWIDE> 1 "register_operand" "0")
10588
+ (ANY_EXTEND:<VWIDE>
10589
+ (match_operand:VDW 2 "register_operand" "w"))
10590
+ (ANY_EXTEND:<VWIDE>
10591
+ (match_operand:VDW 3 "register_operand" "w")))))]
10593
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
10594
+ [(set_attr "simd_type" "simd_mlal")
10595
+ (set_attr "simd_mode" "<MODE>")]
10598
(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
10599
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
10600
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
10601
@@ -1196,7 +1428,9 @@
10602
(set_attr "simd_mode" "<MODE>")]
10605
-(define_insn "aarch64_frint<frint_suffix><mode>"
10606
+;; Vector versions of the floating-point frint patterns.
10607
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
10608
+(define_insn "<frint_pattern><mode>2"
10609
[(set (match_operand:VDQF 0 "register_operand" "=w")
10610
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
10612
@@ -1206,16 +1440,9 @@
10613
(set_attr "simd_mode" "<MODE>")]
10616
-;; Vector versions of the floating-point frint patterns.
10617
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
10618
-(define_expand "<frint_pattern><mode>2"
10619
- [(set (match_operand:VDQF 0 "register_operand")
10620
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
10625
-(define_insn "aarch64_fcvt<frint_suffix><su><mode>"
10626
+;; Vector versions of the fcvt standard patterns.
10627
+;; Expands to lbtrunc, lround, lceil, lfloor
10628
+(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
10629
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
10630
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
10631
[(match_operand:VDQF 1 "register_operand" "w")]
10632
@@ -1226,16 +1453,141 @@
10633
(set_attr "simd_mode" "<MODE>")]
10636
-;; Vector versions of the fcvt standard patterns.
10637
-;; Expands to lbtrunc, lround, lceil, lfloor
10638
-(define_expand "l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2"
10639
+(define_expand "<optab><VDQF:mode><fcvt_target>2"
10640
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
10641
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
10642
[(match_operand:VDQF 1 "register_operand")]
10644
+ UNSPEC_FRINTZ)))]
10648
+(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
10649
+ [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
10650
+ (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
10651
+ [(match_operand:VDQF 1 "register_operand")]
10652
+ UNSPEC_FRINTZ)))]
10656
+(define_expand "ftrunc<VDQF:mode>2"
10657
+ [(set (match_operand:VDQF 0 "register_operand")
10658
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
10663
+(define_insn "<optab><fcvt_target><VDQF:mode>2"
10664
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
10666
+ (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
10668
+ "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
10669
+ [(set_attr "simd_type" "simd_icvtf")
10670
+ (set_attr "simd_mode" "<MODE>")]
10673
+;; Conversions between vectors of floats and doubles.
10674
+;; Contains a mix of patterns to match standard pattern names
10675
+;; and those for intrinsics.
10677
+;; Float widening operations.
10679
+(define_insn "vec_unpacks_lo_v4sf"
10680
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
10681
+ (float_extend:V2DF
10683
+ (match_operand:V4SF 1 "register_operand" "w")
10684
+ (parallel [(const_int 0) (const_int 1)])
10687
+ "fcvtl\\t%0.2d, %1.2s"
10688
+ [(set_attr "simd_type" "simd_fcvtl")
10689
+ (set_attr "simd_mode" "V2DF")]
10692
+(define_insn "aarch64_float_extend_lo_v2df"
10693
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
10694
+ (float_extend:V2DF
10695
+ (match_operand:V2SF 1 "register_operand" "w")))]
10697
+ "fcvtl\\t%0.2d, %1.2s"
10698
+ [(set_attr "simd_type" "simd_fcvtl")
10699
+ (set_attr "simd_mode" "V2DF")]
10702
+(define_insn "vec_unpacks_hi_v4sf"
10703
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
10704
+ (float_extend:V2DF
10706
+ (match_operand:V4SF 1 "register_operand" "w")
10707
+ (parallel [(const_int 2) (const_int 3)])
10710
+ "fcvtl2\\t%0.2d, %1.4s"
10711
+ [(set_attr "simd_type" "simd_fcvtl")
10712
+ (set_attr "simd_mode" "V2DF")]
10715
+;; Float narrowing operations.
10717
+(define_insn "aarch64_float_truncate_lo_v2sf"
10718
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
10719
+ (float_truncate:V2SF
10720
+ (match_operand:V2DF 1 "register_operand" "w")))]
10722
+ "fcvtn\\t%0.2s, %1.2d"
10723
+ [(set_attr "simd_type" "simd_fcvtl")
10724
+ (set_attr "simd_mode" "V2SF")]
10727
+(define_insn "aarch64_float_truncate_hi_v4sf"
10728
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
10730
+ (match_operand:V2SF 1 "register_operand" "0")
10731
+ (float_truncate:V2SF
10732
+ (match_operand:V2DF 2 "register_operand" "w"))))]
10734
+ "fcvtn2\\t%0.4s, %2.2d"
10735
+ [(set_attr "simd_type" "simd_fcvtl")
10736
+ (set_attr "simd_mode" "V4SF")]
10739
+(define_expand "vec_pack_trunc_v2df"
10740
+ [(set (match_operand:V4SF 0 "register_operand")
10742
+ (float_truncate:V2SF
10743
+ (match_operand:V2DF 1 "register_operand"))
10744
+ (float_truncate:V2SF
10745
+ (match_operand:V2DF 2 "register_operand"))
10749
+ rtx tmp = gen_reg_rtx (V2SFmode);
10750
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1]));
10751
+ emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
10752
+ tmp, operands[2]));
10757
+(define_expand "vec_pack_trunc_df"
10758
+ [(set (match_operand:V2SF 0 "register_operand")
10760
+ (float_truncate:SF
10761
+ (match_operand:DF 1 "register_operand"))
10762
+ (float_truncate:SF
10763
+ (match_operand:DF 2 "register_operand"))
10767
+ rtx tmp = gen_reg_rtx (V2SFmode);
10768
+ emit_insn (gen_move_lo_quad_v2df (tmp, operands[1]));
10769
+ emit_insn (gen_move_hi_quad_v2df (tmp, operands[2]));
10770
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
10775
(define_insn "aarch64_vmls<mode>"
10776
[(set (match_operand:VDQF 0 "register_operand" "=w")
10777
(minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
10778
@@ -1261,51 +1613,70 @@
10779
;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
10782
-(define_insn "smax<mode>3"
10783
+(define_insn "<su><maxmin><mode>3"
10784
[(set (match_operand:VDQF 0 "register_operand" "=w")
10785
- (smax:VDQF (match_operand:VDQF 1 "register_operand" "w")
10786
+ (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
10787
(match_operand:VDQF 2 "register_operand" "w")))]
10789
- "fmaxnm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10790
+ "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10791
[(set_attr "simd_type" "simd_fminmax")
10792
(set_attr "simd_mode" "<MODE>")]
10795
-(define_insn "smin<mode>3"
10796
+(define_insn "<maxmin_uns><mode>3"
10797
[(set (match_operand:VDQF 0 "register_operand" "=w")
10798
- (smin:VDQF (match_operand:VDQF 1 "register_operand" "w")
10799
- (match_operand:VDQF 2 "register_operand" "w")))]
10800
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
10801
+ (match_operand:VDQF 2 "register_operand" "w")]
10804
- "fminnm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10805
+ "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10806
[(set_attr "simd_type" "simd_fminmax")
10807
(set_attr "simd_mode" "<MODE>")]
10810
-;; FP 'across lanes' max and min ops.
10811
+;; 'across lanes' add.
10813
-(define_insn "reduc_s<fmaxminv>_v4sf"
10814
- [(set (match_operand:V4SF 0 "register_operand" "=w")
10815
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
10817
+(define_insn "reduc_<sur>plus_<mode>"
10818
+ [(set (match_operand:VDQV 0 "register_operand" "=w")
10819
+ (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
10822
- "f<fmaxminv>nmv\\t%s0, %1.4s";
10823
- [(set_attr "simd_type" "simd_fminmaxv")
10824
- (set_attr "simd_mode" "V4SF")]
10825
+ "addv\\t%<Vetype>0, %1.<Vtype>"
10826
+ [(set_attr "simd_type" "simd_addv")
10827
+ (set_attr "simd_mode" "<MODE>")]
10830
-(define_insn "reduc_s<fmaxminv>_<mode>"
10831
+(define_insn "reduc_<sur>plus_v2di"
10832
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
10833
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
10836
+ "addp\\t%d0, %1.2d"
10837
+ [(set_attr "simd_type" "simd_addv")
10838
+ (set_attr "simd_mode" "V2DI")]
10841
+(define_insn "reduc_<sur>plus_v2si"
10842
+ [(set (match_operand:V2SI 0 "register_operand" "=w")
10843
+ (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
10846
+ "addp\\t%0.2s, %1.2s, %1.2s"
10847
+ [(set_attr "simd_type" "simd_addv")
10848
+ (set_attr "simd_mode" "V2SI")]
10851
+(define_insn "reduc_<sur>plus_<mode>"
10852
[(set (match_operand:V2F 0 "register_operand" "=w")
10853
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
10857
- "f<fmaxminv>nmp\\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>";
10858
- [(set_attr "simd_type" "simd_fminmax")
10859
+ "faddp\\t%<Vetype>0, %1.<Vtype>"
10860
+ [(set_attr "simd_type" "simd_fadd")
10861
(set_attr "simd_mode" "<MODE>")]
10864
-;; FP 'across lanes' add.
10866
-(define_insn "aarch64_addvv4sf"
10867
+(define_insn "aarch64_addpv4sf"
10868
[(set (match_operand:V4SF 0 "register_operand" "=w")
10869
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
10871
@@ -1315,169 +1686,106 @@
10872
(set_attr "simd_mode" "V4SF")]
10875
-(define_expand "reduc_uplus_v4sf"
10876
- [(set (match_operand:V4SF 0 "register_operand" "=w")
10877
- (match_operand:V4SF 1 "register_operand" "w"))]
10878
+(define_expand "reduc_<sur>plus_v4sf"
10879
+ [(set (match_operand:V4SF 0 "register_operand")
10880
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
10884
rtx tmp = gen_reg_rtx (V4SFmode);
10885
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
10886
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
10887
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
10888
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
10892
-(define_expand "reduc_splus_v4sf"
10893
- [(set (match_operand:V4SF 0 "register_operand" "=w")
10894
- (match_operand:V4SF 1 "register_operand" "w"))]
10895
+(define_insn "clz<mode>2"
10896
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
10897
+ (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
10900
- rtx tmp = gen_reg_rtx (V4SFmode);
10901
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
10902
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
10906
-(define_insn "aarch64_addv<mode>"
10907
- [(set (match_operand:V2F 0 "register_operand" "=w")
10908
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
10911
- "faddp\\t%<Vetype>0, %1.<Vtype>"
10912
- [(set_attr "simd_type" "simd_fadd")
10913
- (set_attr "simd_mode" "<MODE>")]
10914
+ "clz\\t%0.<Vtype>, %1.<Vtype>"
10915
+ [(set_attr "simd_type" "simd_cls")
10916
+ (set_attr "simd_mode" "<MODE>")]
10919
-(define_expand "reduc_uplus_<mode>"
10920
- [(set (match_operand:V2F 0 "register_operand" "=w")
10921
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
10926
+;; 'across lanes' max and min ops.
10928
-(define_expand "reduc_splus_<mode>"
10929
- [(set (match_operand:V2F 0 "register_operand" "=w")
10930
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
10936
-;; Reduction across lanes.
10938
-(define_insn "aarch64_addv<mode>"
10939
+(define_insn "reduc_<maxmin_uns>_<mode>"
10940
[(set (match_operand:VDQV 0 "register_operand" "=w")
10941
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
10945
- "addv\\t%<Vetype>0, %1.<Vtype>"
10946
- [(set_attr "simd_type" "simd_addv")
10947
+ "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
10948
+ [(set_attr "simd_type" "simd_minmaxv")
10949
(set_attr "simd_mode" "<MODE>")]
10952
-(define_expand "reduc_splus_<mode>"
10953
- [(set (match_operand:VDQV 0 "register_operand" "=w")
10954
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
10960
-(define_expand "reduc_uplus_<mode>"
10961
- [(set (match_operand:VDQV 0 "register_operand" "=w")
10962
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
10968
-(define_insn "aarch64_addvv2di"
10969
+(define_insn "reduc_<maxmin_uns>_v2di"
10970
[(set (match_operand:V2DI 0 "register_operand" "=w")
10971
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
10975
- "addp\\t%d0, %1.2d"
10976
- [(set_attr "simd_type" "simd_add")
10977
+ "<maxmin_uns_op>p\\t%d0, %1.2d"
10978
+ [(set_attr "simd_type" "simd_minmaxv")
10979
(set_attr "simd_mode" "V2DI")]
10982
-(define_expand "reduc_uplus_v2di"
10983
- [(set (match_operand:V2DI 0 "register_operand" "=w")
10984
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
10990
-(define_expand "reduc_splus_v2di"
10991
- [(set (match_operand:V2DI 0 "register_operand" "=w")
10992
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
10998
-(define_insn "aarch64_addvv2si"
10999
+(define_insn "reduc_<maxmin_uns>_v2si"
11000
[(set (match_operand:V2SI 0 "register_operand" "=w")
11001
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
11005
- "addp\\t%0.2s, %1.2s, %1.2s"
11006
- [(set_attr "simd_type" "simd_add")
11007
+ "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
11008
+ [(set_attr "simd_type" "simd_minmaxv")
11009
(set_attr "simd_mode" "V2SI")]
11012
-(define_expand "reduc_uplus_v2si"
11013
- [(set (match_operand:V2SI 0 "register_operand" "=w")
11014
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
11016
+(define_insn "reduc_<maxmin_uns>_<mode>"
11017
+ [(set (match_operand:V2F 0 "register_operand" "=w")
11018
+ (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
11024
-(define_expand "reduc_splus_v2si"
11025
- [(set (match_operand:V2SI 0 "register_operand" "=w")
11026
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
11032
-(define_insn "reduc_<maxminv>_<mode>"
11033
- [(set (match_operand:VDQV 0 "register_operand" "=w")
11034
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
11037
- "<maxminv>v\\t%<Vetype>0, %1.<Vtype>"
11038
- [(set_attr "simd_type" "simd_minmaxv")
11039
+ "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
11040
+ [(set_attr "simd_type" "simd_fminmaxv")
11041
(set_attr "simd_mode" "<MODE>")]
11044
-(define_insn "reduc_<maxminv>_v2si"
11045
- [(set (match_operand:V2SI 0 "register_operand" "=w")
11046
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
11048
+(define_insn "reduc_<maxmin_uns>_v4sf"
11049
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
11050
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
11053
- "<maxminv>p\\t%0.2s, %1.2s, %1.2s"
11054
- [(set_attr "simd_type" "simd_minmax")
11055
- (set_attr "simd_mode" "V2SI")]
11056
+ "<maxmin_uns_op>v\\t%s0, %1.4s"
11057
+ [(set_attr "simd_type" "simd_fminmaxv")
11058
+ (set_attr "simd_mode" "V4SF")]
11061
-;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register
11062
-;; allocation. For an intrinsic of form:
11063
-;; vD = bsl_* (vS, vN, vM)
11064
+;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
11066
+;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
11069
+;; Thus our BSL is of the form:
11070
+;; op0 = bsl (mask, op2, op3)
11071
;; We can use any of:
11072
-;; bsl vS, vN, vM (if D = S)
11073
-;; bit vD, vN, vS (if D = M, so 1-bits in vS choose bits from vN, else vM)
11074
-;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN)
11076
+;; if (op0 = mask)
11077
+;; bsl mask, op1, op2
11078
+;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
11079
+;; bit op0, op2, mask
11080
+;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
11081
+;; bif op0, op1, mask
11083
(define_insn "aarch64_simd_bsl<mode>_internal"
11084
[(set (match_operand:VALL 0 "register_operand" "=w,w,w")
11086
- [(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
11087
- (match_operand:VALL 2 "register_operand" " w,w,0")
11088
- (match_operand:VALL 3 "register_operand" " w,0,w")]
11092
+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
11093
+ (match_operand:VALL 2 "register_operand" " w,w,0"))
11095
+ (not:<V_cmp_result>
11096
+ (match_dup:<V_cmp_result> 1))
11097
+ (match_operand:VALL 3 "register_operand" " w,0,w"))
11101
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
11102
@@ -1486,28 +1794,32 @@
11105
(define_expand "aarch64_simd_bsl<mode>"
11106
- [(set (match_operand:VALL 0 "register_operand")
11107
- (unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand")
11108
- (match_operand:VALL 2 "register_operand")
11109
- (match_operand:VALL 3 "register_operand")]
11112
+ [(match_operand:VALL 0 "register_operand")
11113
+ (match_operand:<V_cmp_result> 1 "register_operand")
11114
+ (match_operand:VALL 2 "register_operand")
11115
+ (match_operand:VALL 3 "register_operand")]
11118
/* We can't alias operands together if they have different modes. */
11119
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
11120
+ emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
11121
+ operands[2], operands[3]));
11125
-(define_expand "aarch64_vcond_internal<mode>"
11126
+(define_expand "aarch64_vcond_internal<mode><mode>"
11127
[(set (match_operand:VDQ 0 "register_operand")
11129
(match_operator 3 "comparison_operator"
11130
[(match_operand:VDQ 4 "register_operand")
11131
(match_operand:VDQ 5 "nonmemory_operand")])
11132
- (match_operand:VDQ 1 "register_operand")
11133
- (match_operand:VDQ 2 "register_operand")))]
11134
+ (match_operand:VDQ 1 "nonmemory_operand")
11135
+ (match_operand:VDQ 2 "nonmemory_operand")))]
11138
int inverse = 0, has_zero_imm_form = 0;
11139
+ rtx op1 = operands[1];
11140
+ rtx op2 = operands[2];
11141
rtx mask = gen_reg_rtx (<MODE>mode);
11143
switch (GET_CODE (operands[3]))
11144
@@ -1548,12 +1860,12 @@
11148
- emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
11149
+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
11154
- emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
11155
+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
11159
@@ -1566,30 +1878,47 @@
11163
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
11166
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
11169
+ op1 = operands[2];
11170
+ op2 = operands[1];
11173
+ /* If we have (a = (b CMP c) ? -1 : 0);
11174
+ Then we can simply move the generated mask. */
11176
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
11177
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
11178
+ emit_move_insn (operands[0], mask);
11181
+ if (!REG_P (op1))
11182
+ op1 = force_reg (<MODE>mode, op1);
11183
+ if (!REG_P (op2))
11184
+ op2 = force_reg (<MODE>mode, op2);
11185
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
11192
-(define_expand "aarch64_vcond_internal<mode>"
11193
- [(set (match_operand:VDQF 0 "register_operand")
11194
+(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
11195
+ [(set (match_operand:VDQF_COND 0 "register_operand")
11197
(match_operator 3 "comparison_operator"
11198
[(match_operand:VDQF 4 "register_operand")
11199
(match_operand:VDQF 5 "nonmemory_operand")])
11200
- (match_operand:VDQF 1 "register_operand")
11201
- (match_operand:VDQF 2 "register_operand")))]
11202
+ (match_operand:VDQF_COND 1 "nonmemory_operand")
11203
+ (match_operand:VDQF_COND 2 "nonmemory_operand")))]
11207
int use_zero_form = 0;
11208
int swap_bsl_operands = 0;
11209
- rtx mask = gen_reg_rtx (<V_cmp_result>mode);
11210
- rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
11211
+ rtx op1 = operands[1];
11212
+ rtx op2 = operands[2];
11213
+ rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
11214
+ rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
11216
rtx (*base_comparison) (rtx, rtx, rtx);
11217
rtx (*complimentary_comparison) (rtx, rtx, rtx);
11218
@@ -1609,7 +1938,7 @@
11219
/* Fall through. */
11221
if (!REG_P (operands[5]))
11222
- operands[5] = force_reg (<MODE>mode, operands[5]);
11223
+ operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
11226
switch (GET_CODE (operands[3]))
11227
@@ -1622,8 +1951,8 @@
11231
- base_comparison = gen_aarch64_cmge<mode>;
11232
- complimentary_comparison = gen_aarch64_cmgt<mode>;
11233
+ base_comparison = gen_aarch64_cmge<VDQF:mode>;
11234
+ complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
11238
@@ -1631,14 +1960,14 @@
11239
/* Fall through. */
11242
- base_comparison = gen_aarch64_cmgt<mode>;
11243
- complimentary_comparison = gen_aarch64_cmge<mode>;
11244
+ base_comparison = gen_aarch64_cmgt<VDQF:mode>;
11245
+ complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
11250
- base_comparison = gen_aarch64_cmeq<mode>;
11251
- complimentary_comparison = gen_aarch64_cmeq<mode>;
11252
+ base_comparison = gen_aarch64_cmeq<VDQF:mode>;
11253
+ complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
11256
gcc_unreachable ();
11257
@@ -1666,10 +1995,10 @@
11258
switch (GET_CODE (operands[3]))
11261
- base_comparison = gen_aarch64_cmlt<mode>;
11262
+ base_comparison = gen_aarch64_cmlt<VDQF:mode>;
11265
- base_comparison = gen_aarch64_cmle<mode>;
11266
+ base_comparison = gen_aarch64_cmle<VDQF:mode>;
11269
/* Do nothing, other zero form cases already have the correct
11270
@@ -1712,9 +2041,9 @@
11271
true iff !(a != b && a ORDERED b), swapping the operands to BSL
11272
will then give us (a == b || a UNORDERED b) as intended. */
11274
- emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
11275
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
11276
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
11277
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
11278
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
11279
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
11280
swap_bsl_operands = 1;
11283
@@ -1723,20 +2052,36 @@
11284
swap_bsl_operands = 1;
11285
/* Fall through. */
11287
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
11288
- emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
11289
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
11290
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
11291
+ emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
11292
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
11295
gcc_unreachable ();
11298
if (swap_bsl_operands)
11299
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
11302
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
11305
+ op1 = operands[2];
11306
+ op2 = operands[1];
11309
+ /* If we have (a = (b CMP c) ? -1 : 0);
11310
+ Then we can simply move the generated mask. */
11312
+ if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
11313
+ && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
11314
+ emit_move_insn (operands[0], mask);
11317
+ if (!REG_P (op1))
11318
+ op1 = force_reg (<VDQF_COND:MODE>mode, op1);
11319
+ if (!REG_P (op2))
11320
+ op2 = force_reg (<VDQF_COND:MODE>mode, op2);
11321
+ emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
11328
@@ -1746,16 +2091,32 @@
11329
(match_operator 3 "comparison_operator"
11330
[(match_operand:VALL 4 "register_operand")
11331
(match_operand:VALL 5 "nonmemory_operand")])
11332
- (match_operand:VALL 1 "register_operand")
11333
- (match_operand:VALL 2 "register_operand")))]
11334
+ (match_operand:VALL 1 "nonmemory_operand")
11335
+ (match_operand:VALL 2 "nonmemory_operand")))]
11338
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
11339
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
11340
operands[2], operands[3],
11341
operands[4], operands[5]));
11345
+(define_expand "vcond<v_cmp_result><mode>"
11346
+ [(set (match_operand:<V_cmp_result> 0 "register_operand")
11347
+ (if_then_else:<V_cmp_result>
11348
+ (match_operator 3 "comparison_operator"
11349
+ [(match_operand:VDQF 4 "register_operand")
11350
+ (match_operand:VDQF 5 "nonmemory_operand")])
11351
+ (match_operand:<V_cmp_result> 1 "nonmemory_operand")
11352
+ (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
11355
+ emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
11356
+ operands[0], operands[1],
11357
+ operands[2], operands[3],
11358
+ operands[4], operands[5]));
11362
(define_expand "vcondu<mode><mode>"
11363
[(set (match_operand:VDQ 0 "register_operand")
11364
@@ -1763,11 +2124,11 @@
11365
(match_operator 3 "comparison_operator"
11366
[(match_operand:VDQ 4 "register_operand")
11367
(match_operand:VDQ 5 "nonmemory_operand")])
11368
- (match_operand:VDQ 1 "register_operand")
11369
- (match_operand:VDQ 2 "register_operand")))]
11370
+ (match_operand:VDQ 1 "nonmemory_operand")
11371
+ (match_operand:VDQ 2 "nonmemory_operand")))]
11374
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
11375
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
11376
operands[2], operands[3],
11377
operands[4], operands[5]));
11379
@@ -2861,28 +3222,6 @@
11380
(set_attr "simd_mode" "<MODE>")]
11385
-(define_expand "aarch64_sshl_n<mode>"
11386
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
11387
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
11388
- (match_operand:SI 2 "immediate_operand" "i")]
11391
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
11395
-(define_expand "aarch64_ushl_n<mode>"
11396
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
11397
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
11398
- (match_operand:SI 2 "immediate_operand" "i")]
11401
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
11407
(define_insn "aarch64_<sur>shll_n<mode>"
11408
@@ -2927,28 +3266,6 @@
11409
(set_attr "simd_mode" "<MODE>")]
11414
-(define_expand "aarch64_sshr_n<mode>"
11415
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
11416
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
11417
- (match_operand:SI 2 "immediate_operand" "i")]
11420
- emit_insn (gen_ashr<mode>3 (operands[0], operands[1], operands[2]));
11424
-(define_expand "aarch64_ushr_n<mode>"
11425
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
11426
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
11427
- (match_operand:SI 2 "immediate_operand" "i")]
11430
- emit_insn (gen_lshr<mode>3 (operands[0], operands[1], operands[2]));
11436
(define_insn "aarch64_<sur>shr_n<mode>"
11437
@@ -3034,52 +3351,180 @@
11441
-;; cm(eq|ge|le|lt|gt)
11442
+;; cm(eq|ge|gt|lt|le)
11443
+;; Note, we have constraints for Dz and Z as different expanders
11444
+;; have different ideas of what should be passed to this pattern.
11446
-(define_insn "aarch64_cm<cmp><mode>"
11447
+(define_insn "aarch64_cm<optab><mode>"
11448
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
11449
- (unspec:<V_cmp_result>
11450
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
11451
- (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
11453
+ (neg:<V_cmp_result>
11454
+ (COMPARISONS:<V_cmp_result>
11455
+ (match_operand:VDQ 1 "register_operand" "w,w")
11456
+ (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
11460
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
11461
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
11462
+ cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
11463
+ cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
11464
[(set_attr "simd_type" "simd_cmp")
11465
(set_attr "simd_mode" "<MODE>")]
11469
+(define_insn_and_split "aarch64_cm<optab>di"
11470
+ [(set (match_operand:DI 0 "register_operand" "=w,w,r")
11473
+ (match_operand:DI 1 "register_operand" "w,w,r")
11474
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
11476
+ (clobber (reg:CC CC_REGNUM))]
11479
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
11480
+ cm<optab>\t%d0, %d1, #0
11482
+ "reload_completed
11483
+ /* We need to prevent the split from
11484
+ happening in the 'w' constraint cases. */
11485
+ && GP_REGNUM_P (REGNO (operands[0]))
11486
+ && GP_REGNUM_P (REGNO (operands[1]))"
11489
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
11490
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
11491
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
11492
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
11495
+ [(set_attr "simd_type" "simd_cmp")
11496
+ (set_attr "simd_mode" "DI")]
11499
-(define_insn "aarch64_cm<cmp><mode>"
11502
+(define_insn "aarch64_cm<optab><mode>"
11503
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
11504
- (unspec:<V_cmp_result>
11505
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
11506
- (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
11508
+ (neg:<V_cmp_result>
11509
+ (UCOMPARISONS:<V_cmp_result>
11510
+ (match_operand:VDQ 1 "register_operand" "w")
11511
+ (match_operand:VDQ 2 "register_operand" "w")
11514
- "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
11515
+ "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
11516
[(set_attr "simd_type" "simd_cmp")
11517
(set_attr "simd_mode" "<MODE>")]
11520
-;; fcm(eq|ge|le|lt|gt)
11521
+(define_insn_and_split "aarch64_cm<optab>di"
11522
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
11525
+ (match_operand:DI 1 "register_operand" "w,r")
11526
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
11528
+ (clobber (reg:CC CC_REGNUM))]
11531
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
11533
+ "reload_completed
11534
+ /* We need to prevent the split from
11535
+ happening in the 'w' constraint cases. */
11536
+ && GP_REGNUM_P (REGNO (operands[0]))
11537
+ && GP_REGNUM_P (REGNO (operands[1]))"
11540
+ enum machine_mode mode = CCmode;
11541
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
11542
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
11543
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
11546
+ [(set_attr "simd_type" "simd_cmp")
11547
+ (set_attr "simd_mode" "DI")]
11550
-(define_insn "aarch64_cm<cmp><mode>"
11553
+(define_insn "aarch64_cmtst<mode>"
11554
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
11555
+ (neg:<V_cmp_result>
11556
+ (ne:<V_cmp_result>
11558
+ (match_operand:VDQ 1 "register_operand" "w")
11559
+ (match_operand:VDQ 2 "register_operand" "w"))
11560
+ (vec_duplicate:<V_cmp_result> (const_int 0)))))]
11562
+ "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
11563
+ [(set_attr "simd_type" "simd_cmp")
11564
+ (set_attr "simd_mode" "<MODE>")]
11567
+(define_insn_and_split "aarch64_cmtstdi"
11568
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
11572
+ (match_operand:DI 1 "register_operand" "w,r")
11573
+ (match_operand:DI 2 "register_operand" "w,r"))
11575
+ (clobber (reg:CC CC_REGNUM))]
11578
+ cmtst\t%d0, %d1, %d2
11580
+ "reload_completed
11581
+ /* We need to prevent the split from
11582
+ happening in the 'w' constraint cases. */
11583
+ && GP_REGNUM_P (REGNO (operands[0]))
11584
+ && GP_REGNUM_P (REGNO (operands[1]))"
11587
+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
11588
+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
11589
+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
11590
+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
11591
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
11594
+ [(set_attr "simd_type" "simd_cmp")
11595
+ (set_attr "simd_mode" "DI")]
11598
+;; fcm(eq|ge|gt|le|lt)
11600
+(define_insn "aarch64_cm<optab><mode>"
11601
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
11602
- (unspec:<V_cmp_result>
11603
- [(match_operand:VDQF 1 "register_operand" "w,w")
11604
- (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
11606
+ (neg:<V_cmp_result>
11607
+ (COMPARISONS:<V_cmp_result>
11608
+ (match_operand:VALLF 1 "register_operand" "w,w")
11609
+ (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
11613
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
11614
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
11615
+ fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
11616
+ fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
11617
[(set_attr "simd_type" "simd_fcmp")
11618
(set_attr "simd_mode" "<MODE>")]
11622
+;; Note we can also handle what would be fac(le|lt) by
11623
+;; generating fac(ge|gt).
11625
+(define_insn "*aarch64_fac<optab><mode>"
11626
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
11627
+ (neg:<V_cmp_result>
11628
+ (FAC_COMPARISONS:<V_cmp_result>
11629
+ (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
11630
+ (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
11633
+ "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
11634
+ [(set_attr "simd_type" "simd_fcmp")
11635
+ (set_attr "simd_mode" "<MODE>")]
11640
(define_insn "aarch64_addp<mode>"
11641
@@ -3105,30 +3550,6 @@
11642
(set_attr "simd_mode" "DI")]
11647
-(define_expand "aarch64_<maxmin><mode>"
11648
- [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
11649
- (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
11650
- (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
11653
- emit_insn (gen_<maxmin><mode>3 (operands[0], operands[1], operands[2]));
11658
-(define_insn "aarch64_<fmaxmin><mode>"
11659
- [(set (match_operand:VDQF 0 "register_operand" "=w")
11660
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
11661
- (match_operand:VDQF 2 "register_operand" "w")]
11664
- "<fmaxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
11665
- [(set_attr "simd_type" "simd_fminmax")
11666
- (set_attr "simd_mode" "<MODE>")]
11671
(define_insn "sqrt<mode>2"
11672
@@ -3140,16 +3561,6 @@
11673
(set_attr "simd_mode" "<MODE>")]
11676
-(define_expand "aarch64_sqrt<mode>"
11677
- [(match_operand:VDQF 0 "register_operand" "=w")
11678
- (match_operand:VDQF 1 "register_operand" "w")]
11681
- emit_insn (gen_sqrt<mode>2 (operands[0], operands[1]));
11686
;; Patterns for vector struct loads and stores.
11688
(define_insn "vec_load_lanesoi<mode>"
11689
@@ -3736,3 +4147,25 @@
11690
"ld1r\\t{%0.<Vtype>}, %1"
11691
[(set_attr "simd_type" "simd_load1r")
11692
(set_attr "simd_mode" "<MODE>")])
11694
+(define_insn "aarch64_frecpe<mode>"
11695
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
11696
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
11699
+ "frecpe\\t%0.<Vtype>, %1.<Vtype>"
11700
+ [(set_attr "simd_type" "simd_frecpe")
11701
+ (set_attr "simd_mode" "<MODE>")]
11704
+(define_insn "aarch64_frecps<mode>"
11705
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
11706
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
11707
+ (match_operand:VDQF 2 "register_operand" "w")]
11710
+ "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
11711
+ [(set_attr "simd_type" "simd_frecps")
11712
+ (set_attr "simd_mode" "<MODE>")]
11715
--- a/src/gcc/config/aarch64/predicates.md
11716
+++ b/src/gcc/config/aarch64/predicates.md
11718
(ior (match_operand 0 "register_operand")
11719
(match_test "op == const0_rtx"))))
11721
+(define_predicate "aarch64_reg_or_fp_zero"
11722
+ (and (match_code "reg,subreg,const_double")
11723
+ (ior (match_operand 0 "register_operand")
11724
+ (match_test "aarch64_float_const_zero_rtx_p (op)"))))
11726
(define_predicate "aarch64_reg_zero_or_m1_or_1"
11727
(and (match_code "reg,subreg,const_int")
11728
(ior (match_operand 0 "register_operand")
11729
@@ -110,16 +115,11 @@
11730
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
11733
-(define_predicate "aarch64_const_address"
11734
- (and (match_code "symbol_ref")
11735
- (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)")))
11737
(define_predicate "aarch64_valid_symref"
11738
(match_code "const, symbol_ref, label_ref")
11740
- enum aarch64_symbol_type symbol_type;
11741
- return (aarch64_symbolic_constant_p (op, SYMBOL_CONTEXT_ADR, &symbol_type)
11742
- && symbol_type != SYMBOL_FORCE_TO_MEM);
11743
+ return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR)
11744
+ != SYMBOL_FORCE_TO_MEM);
11747
(define_predicate "aarch64_tls_ie_symref"
11748
@@ -165,15 +165,10 @@
11751
(define_predicate "aarch64_mov_operand"
11752
- (and (match_code "reg,subreg,mem,const_int,symbol_ref,high")
11753
+ (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high")
11754
(ior (match_operand 0 "register_operand")
11755
(ior (match_operand 0 "memory_operand")
11756
- (ior (match_test "GET_CODE (op) == HIGH
11757
- && aarch64_valid_symref (XEXP (op, 0),
11758
- GET_MODE (XEXP (op, 0)))")
11759
- (ior (match_test "CONST_INT_P (op)
11760
- && aarch64_move_imm (INTVAL (op), mode)")
11761
- (match_test "aarch64_const_address (op, mode)")))))))
11762
+ (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)")))))
11764
(define_predicate "aarch64_movti_operand"
11765
(and (match_code "reg,subreg,mem,const_int")
11766
--- a/src/gcc/config/aarch64/aarch64-elf.h
11767
+++ b/src/gcc/config/aarch64/aarch64-elf.h
11768
@@ -106,7 +106,6 @@
11770
#define ASM_COMMENT_START "//"
11772
-#define REGISTER_PREFIX ""
11773
#define LOCAL_LABEL_PREFIX "."
11774
#define USER_LABEL_PREFIX ""
11776
--- a/src/gcc/config/aarch64/arm_neon.h
11777
+++ b/src/gcc/config/aarch64/arm_neon.h
11778
@@ -4468,160 +4468,6 @@
11782
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11783
-vabs_f32 (float32x2_t a)
11785
- float32x2_t result;
11786
- __asm__ ("fabs %0.2s,%1.2s"
11789
- : /* No clobbers */);
11793
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11794
-vabs_s8 (int8x8_t a)
11797
- __asm__ ("abs %0.8b,%1.8b"
11800
- : /* No clobbers */);
11804
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11805
-vabs_s16 (int16x4_t a)
11807
- int16x4_t result;
11808
- __asm__ ("abs %0.4h,%1.4h"
11811
- : /* No clobbers */);
11815
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11816
-vabs_s32 (int32x2_t a)
11818
- int32x2_t result;
11819
- __asm__ ("abs %0.2s,%1.2s"
11822
- : /* No clobbers */);
11826
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11827
-vabsq_f32 (float32x4_t a)
11829
- float32x4_t result;
11830
- __asm__ ("fabs %0.4s,%1.4s"
11833
- : /* No clobbers */);
11837
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11838
-vabsq_f64 (float64x2_t a)
11840
- float64x2_t result;
11841
- __asm__ ("fabs %0.2d,%1.2d"
11844
- : /* No clobbers */);
11848
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11849
-vabsq_s8 (int8x16_t a)
11851
- int8x16_t result;
11852
- __asm__ ("abs %0.16b,%1.16b"
11855
- : /* No clobbers */);
11859
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11860
-vabsq_s16 (int16x8_t a)
11862
- int16x8_t result;
11863
- __asm__ ("abs %0.8h,%1.8h"
11866
- : /* No clobbers */);
11870
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11871
-vabsq_s32 (int32x4_t a)
11873
- int32x4_t result;
11874
- __asm__ ("abs %0.4s,%1.4s"
11877
- : /* No clobbers */);
11881
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11882
-vabsq_s64 (int64x2_t a)
11884
- int64x2_t result;
11885
- __asm__ ("abs %0.2d,%1.2d"
11888
- : /* No clobbers */);
11892
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
11893
-vacged_f64 (float64_t a, float64_t b)
11895
- float64_t result;
11896
- __asm__ ("facge %d0,%d1,%d2"
11899
- : /* No clobbers */);
11903
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
11904
-vacges_f32 (float32_t a, float32_t b)
11906
- float32_t result;
11907
- __asm__ ("facge %s0,%s1,%s2"
11910
- : /* No clobbers */);
11914
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
11915
-vacgtd_f64 (float64_t a, float64_t b)
11917
- float64_t result;
11918
- __asm__ ("facgt %d0,%d1,%d2"
11921
- : /* No clobbers */);
11925
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
11926
-vacgts_f32 (float32_t a, float32_t b)
11928
- float32_t result;
11929
- __asm__ ("facgt %s0,%s1,%s2"
11932
- : /* No clobbers */);
11936
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
11937
vaddlv_s8 (int8x8_t a)
11939
@@ -4732,116 +4578,6 @@
11943
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
11944
-vaddv_s8 (int8x8_t a)
11947
- __asm__ ("addv %b0,%1.8b"
11950
- : /* No clobbers */);
11954
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
11955
-vaddv_s16 (int16x4_t a)
11958
- __asm__ ("addv %h0,%1.4h"
11961
- : /* No clobbers */);
11965
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11966
-vaddv_u8 (uint8x8_t a)
11969
- __asm__ ("addv %b0,%1.8b"
11972
- : /* No clobbers */);
11976
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11977
-vaddv_u16 (uint16x4_t a)
11980
- __asm__ ("addv %h0,%1.4h"
11983
- : /* No clobbers */);
11987
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
11988
-vaddvq_s8 (int8x16_t a)
11991
- __asm__ ("addv %b0,%1.16b"
11994
- : /* No clobbers */);
11998
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
11999
-vaddvq_s16 (int16x8_t a)
12002
- __asm__ ("addv %h0,%1.8h"
12005
- : /* No clobbers */);
12009
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12010
-vaddvq_s32 (int32x4_t a)
12013
- __asm__ ("addv %s0,%1.4s"
12016
- : /* No clobbers */);
12020
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
12021
-vaddvq_u8 (uint8x16_t a)
12024
- __asm__ ("addv %b0,%1.16b"
12027
- : /* No clobbers */);
12031
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
12032
-vaddvq_u16 (uint16x8_t a)
12035
- __asm__ ("addv %h0,%1.8h"
12038
- : /* No clobbers */);
12042
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12043
-vaddvq_u32 (uint32x4_t a)
12046
- __asm__ ("addv %s0,%1.4s"
12049
- : /* No clobbers */);
12053
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12054
vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
12056
@@ -5095,358 +4831,6 @@
12060
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12061
-vcage_f32 (float32x2_t a, float32x2_t b)
12063
- uint32x2_t result;
12064
- __asm__ ("facge %0.2s, %1.2s, %2.2s"
12067
- : /* No clobbers */);
12071
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12072
-vcageq_f32 (float32x4_t a, float32x4_t b)
12074
- uint32x4_t result;
12075
- __asm__ ("facge %0.4s, %1.4s, %2.4s"
12078
- : /* No clobbers */);
12082
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12083
-vcageq_f64 (float64x2_t a, float64x2_t b)
12085
- uint64x2_t result;
12086
- __asm__ ("facge %0.2d, %1.2d, %2.2d"
12089
- : /* No clobbers */);
12093
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12094
-vcagt_f32 (float32x2_t a, float32x2_t b)
12096
- uint32x2_t result;
12097
- __asm__ ("facgt %0.2s, %1.2s, %2.2s"
12100
- : /* No clobbers */);
12104
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12105
-vcagtq_f32 (float32x4_t a, float32x4_t b)
12107
- uint32x4_t result;
12108
- __asm__ ("facgt %0.4s, %1.4s, %2.4s"
12111
- : /* No clobbers */);
12115
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12116
-vcagtq_f64 (float64x2_t a, float64x2_t b)
12118
- uint64x2_t result;
12119
- __asm__ ("facgt %0.2d, %1.2d, %2.2d"
12122
- : /* No clobbers */);
12126
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12127
-vcale_f32 (float32x2_t a, float32x2_t b)
12129
- uint32x2_t result;
12130
- __asm__ ("facge %0.2s, %2.2s, %1.2s"
12133
- : /* No clobbers */);
12137
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12138
-vcaleq_f32 (float32x4_t a, float32x4_t b)
12140
- uint32x4_t result;
12141
- __asm__ ("facge %0.4s, %2.4s, %1.4s"
12144
- : /* No clobbers */);
12148
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12149
-vcaleq_f64 (float64x2_t a, float64x2_t b)
12151
- uint64x2_t result;
12152
- __asm__ ("facge %0.2d, %2.2d, %1.2d"
12155
- : /* No clobbers */);
12159
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12160
-vcalt_f32 (float32x2_t a, float32x2_t b)
12162
- uint32x2_t result;
12163
- __asm__ ("facgt %0.2s, %2.2s, %1.2s"
12166
- : /* No clobbers */);
12170
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12171
-vcaltq_f32 (float32x4_t a, float32x4_t b)
12173
- uint32x4_t result;
12174
- __asm__ ("facgt %0.4s, %2.4s, %1.4s"
12177
- : /* No clobbers */);
12181
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12182
-vcaltq_f64 (float64x2_t a, float64x2_t b)
12184
- uint64x2_t result;
12185
- __asm__ ("facgt %0.2d, %2.2d, %1.2d"
12188
- : /* No clobbers */);
12192
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12193
-vceq_f32 (float32x2_t a, float32x2_t b)
12195
- uint32x2_t result;
12196
- __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
12199
- : /* No clobbers */);
12203
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12204
-vceq_f64 (float64x1_t a, float64x1_t b)
12206
- uint64x1_t result;
12207
- __asm__ ("fcmeq %d0, %d1, %d2"
12210
- : /* No clobbers */);
12214
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12215
-vceqd_f64 (float64_t a, float64_t b)
12217
- float64_t result;
12218
- __asm__ ("fcmeq %d0,%d1,%d2"
12221
- : /* No clobbers */);
12225
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12226
-vceqq_f32 (float32x4_t a, float32x4_t b)
12228
- uint32x4_t result;
12229
- __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
12232
- : /* No clobbers */);
12236
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12237
-vceqq_f64 (float64x2_t a, float64x2_t b)
12239
- uint64x2_t result;
12240
- __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
12243
- : /* No clobbers */);
12247
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12248
-vceqs_f32 (float32_t a, float32_t b)
12250
- float32_t result;
12251
- __asm__ ("fcmeq %s0,%s1,%s2"
12254
- : /* No clobbers */);
12258
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12259
-vceqzd_f64 (float64_t a)
12261
- float64_t result;
12262
- __asm__ ("fcmeq %d0,%d1,#0"
12265
- : /* No clobbers */);
12269
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12270
-vceqzs_f32 (float32_t a)
12272
- float32_t result;
12273
- __asm__ ("fcmeq %s0,%s1,#0"
12276
- : /* No clobbers */);
12280
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12281
-vcge_f32 (float32x2_t a, float32x2_t b)
12283
- uint32x2_t result;
12284
- __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
12287
- : /* No clobbers */);
12291
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12292
-vcge_f64 (float64x1_t a, float64x1_t b)
12294
- uint64x1_t result;
12295
- __asm__ ("fcmge %d0, %d1, %d2"
12298
- : /* No clobbers */);
12302
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12303
-vcgeq_f32 (float32x4_t a, float32x4_t b)
12305
- uint32x4_t result;
12306
- __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
12309
- : /* No clobbers */);
12313
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12314
-vcgeq_f64 (float64x2_t a, float64x2_t b)
12316
- uint64x2_t result;
12317
- __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
12320
- : /* No clobbers */);
12324
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12325
-vcgt_f32 (float32x2_t a, float32x2_t b)
12327
- uint32x2_t result;
12328
- __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
12331
- : /* No clobbers */);
12335
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12336
-vcgt_f64 (float64x1_t a, float64x1_t b)
12338
- uint64x1_t result;
12339
- __asm__ ("fcmgt %d0, %d1, %d2"
12342
- : /* No clobbers */);
12346
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12347
-vcgtq_f32 (float32x4_t a, float32x4_t b)
12349
- uint32x4_t result;
12350
- __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
12353
- : /* No clobbers */);
12357
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12358
-vcgtq_f64 (float64x2_t a, float64x2_t b)
12360
- uint64x2_t result;
12361
- __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
12364
- : /* No clobbers */);
12368
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12369
-vcle_f32 (float32x2_t a, float32x2_t b)
12371
- uint32x2_t result;
12372
- __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
12375
- : /* No clobbers */);
12379
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12380
-vcle_f64 (float64x1_t a, float64x1_t b)
12382
- uint64x1_t result;
12383
- __asm__ ("fcmge %d0, %d2, %d1"
12386
- : /* No clobbers */);
12390
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12391
-vcleq_f32 (float32x4_t a, float32x4_t b)
12393
- uint32x4_t result;
12394
- __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
12397
- : /* No clobbers */);
12401
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12402
-vcleq_f64 (float64x2_t a, float64x2_t b)
12404
- uint64x2_t result;
12405
- __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
12408
- : /* No clobbers */);
12412
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12413
vcls_s8 (int8x8_t a)
12415
@@ -5513,50 +4897,6 @@
12419
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12420
-vclt_f32 (float32x2_t a, float32x2_t b)
12422
- uint32x2_t result;
12423
- __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
12426
- : /* No clobbers */);
12430
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12431
-vclt_f64 (float64x1_t a, float64x1_t b)
12433
- uint64x1_t result;
12434
- __asm__ ("fcmgt %d0, %d2, %d1"
12437
- : /* No clobbers */);
12441
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12442
-vcltq_f32 (float32x4_t a, float32x4_t b)
12444
- uint32x4_t result;
12445
- __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
12448
- : /* No clobbers */);
12452
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12453
-vcltq_f64 (float64x2_t a, float64x2_t b)
12455
- uint64x2_t result;
12456
- __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
12459
- : /* No clobbers */);
12463
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12464
vclz_s8 (int8x8_t a)
12466
@@ -5915,100 +5255,12 @@
12468
/* vcvt_f32_f16 not supported */
12470
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12471
-vcvt_f32_f64 (float64x2_t a)
12473
- float32x2_t result;
12474
- __asm__ ("fcvtn %0.2s,%1.2d"
12477
- : /* No clobbers */);
12481
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12482
-vcvt_f32_s32 (int32x2_t a)
12484
- float32x2_t result;
12485
- __asm__ ("scvtf %0.2s, %1.2s"
12488
- : /* No clobbers */);
12492
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12493
-vcvt_f32_u32 (uint32x2_t a)
12495
- float32x2_t result;
12496
- __asm__ ("ucvtf %0.2s, %1.2s"
12499
- : /* No clobbers */);
12503
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12504
-vcvt_f64_f32 (float32x2_t a)
12506
- float64x2_t result;
12507
- __asm__ ("fcvtl %0.2d,%1.2s"
12510
- : /* No clobbers */);
12514
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12515
-vcvt_f64_s64 (uint64x1_t a)
12517
- float64x1_t result;
12518
- __asm__ ("scvtf %d0, %d1"
12521
- : /* No clobbers */);
12525
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12526
-vcvt_f64_u64 (uint64x1_t a)
12528
- float64x1_t result;
12529
- __asm__ ("ucvtf %d0, %d1"
12532
- : /* No clobbers */);
12536
/* vcvt_high_f16_f32 not supported */
12538
/* vcvt_high_f32_f16 not supported */
12540
static float32x2_t vdup_n_f32 (float32_t);
12542
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12543
-vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
12545
- float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
12546
- __asm__ ("fcvtn2 %0.4s,%2.2d"
12549
- : /* No clobbers */);
12553
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12554
-vcvt_high_f64_f32 (float32x4_t a)
12556
- float64x2_t result;
12557
- __asm__ ("fcvtl2 %0.2d,%1.4s"
12560
- : /* No clobbers */);
12564
#define vcvt_n_f32_s32(a, b) \
12567
@@ -6057,160 +5309,6 @@
12571
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12572
-vcvt_s32_f32 (float32x2_t a)
12574
- int32x2_t result;
12575
- __asm__ ("fcvtzs %0.2s, %1.2s"
12578
- : /* No clobbers */);
12582
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12583
-vcvt_u32_f32 (float32x2_t a)
12585
- uint32x2_t result;
12586
- __asm__ ("fcvtzu %0.2s, %1.2s"
12589
- : /* No clobbers */);
12593
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12594
-vcvta_s32_f32 (float32x2_t a)
12596
- int32x2_t result;
12597
- __asm__ ("fcvtas %0.2s, %1.2s"
12600
- : /* No clobbers */);
12604
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12605
-vcvta_u32_f32 (float32x2_t a)
12607
- uint32x2_t result;
12608
- __asm__ ("fcvtau %0.2s, %1.2s"
12611
- : /* No clobbers */);
12615
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12616
-vcvtad_s64_f64 (float64_t a)
12618
- float64_t result;
12619
- __asm__ ("fcvtas %d0,%d1"
12622
- : /* No clobbers */);
12626
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12627
-vcvtad_u64_f64 (float64_t a)
12629
- float64_t result;
12630
- __asm__ ("fcvtau %d0,%d1"
12633
- : /* No clobbers */);
12637
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12638
-vcvtaq_s32_f32 (float32x4_t a)
12640
- int32x4_t result;
12641
- __asm__ ("fcvtas %0.4s, %1.4s"
12644
- : /* No clobbers */);
12648
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12649
-vcvtaq_s64_f64 (float64x2_t a)
12651
- int64x2_t result;
12652
- __asm__ ("fcvtas %0.2d, %1.2d"
12655
- : /* No clobbers */);
12659
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12660
-vcvtaq_u32_f32 (float32x4_t a)
12662
- uint32x4_t result;
12663
- __asm__ ("fcvtau %0.4s, %1.4s"
12666
- : /* No clobbers */);
12670
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12671
-vcvtaq_u64_f64 (float64x2_t a)
12673
- uint64x2_t result;
12674
- __asm__ ("fcvtau %0.2d, %1.2d"
12677
- : /* No clobbers */);
12681
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12682
-vcvtas_s64_f64 (float32_t a)
12684
- float32_t result;
12685
- __asm__ ("fcvtas %s0,%s1"
12688
- : /* No clobbers */);
12692
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12693
-vcvtas_u64_f64 (float32_t a)
12695
- float32_t result;
12696
- __asm__ ("fcvtau %s0,%s1"
12699
- : /* No clobbers */);
12703
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12704
-vcvtd_f64_s64 (int64_t a)
12707
- __asm__ ("scvtf %d0,%d1"
12710
- : /* No clobbers */);
12714
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12715
-vcvtd_f64_u64 (uint64_t a)
12718
- __asm__ ("ucvtf %d0,%d1"
12721
- : /* No clobbers */);
12725
#define vcvtd_n_f64_s64(a, b) \
12728
@@ -6259,402 +5357,6 @@
12732
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12733
-vcvtd_s64_f64 (float64_t a)
12735
- float64_t result;
12736
- __asm__ ("fcvtzs %d0,%d1"
12739
- : /* No clobbers */);
12743
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12744
-vcvtd_u64_f64 (float64_t a)
12746
- float64_t result;
12747
- __asm__ ("fcvtzu %d0,%d1"
12750
- : /* No clobbers */);
12754
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12755
-vcvtm_s32_f32 (float32x2_t a)
12757
- int32x2_t result;
12758
- __asm__ ("fcvtms %0.2s, %1.2s"
12761
- : /* No clobbers */);
12765
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12766
-vcvtm_u32_f32 (float32x2_t a)
12768
- uint32x2_t result;
12769
- __asm__ ("fcvtmu %0.2s, %1.2s"
12772
- : /* No clobbers */);
12776
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12777
-vcvtmd_s64_f64 (float64_t a)
12779
- float64_t result;
12780
- __asm__ ("fcvtms %d0,%d1"
12783
- : /* No clobbers */);
12787
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12788
-vcvtmd_u64_f64 (float64_t a)
12790
- float64_t result;
12791
- __asm__ ("fcvtmu %d0,%d1"
12794
- : /* No clobbers */);
12798
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12799
-vcvtmq_s32_f32 (float32x4_t a)
12801
- int32x4_t result;
12802
- __asm__ ("fcvtms %0.4s, %1.4s"
12805
- : /* No clobbers */);
12809
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12810
-vcvtmq_s64_f64 (float64x2_t a)
12812
- int64x2_t result;
12813
- __asm__ ("fcvtms %0.2d, %1.2d"
12816
- : /* No clobbers */);
12820
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12821
-vcvtmq_u32_f32 (float32x4_t a)
12823
- uint32x4_t result;
12824
- __asm__ ("fcvtmu %0.4s, %1.4s"
12827
- : /* No clobbers */);
12831
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12832
-vcvtmq_u64_f64 (float64x2_t a)
12834
- uint64x2_t result;
12835
- __asm__ ("fcvtmu %0.2d, %1.2d"
12838
- : /* No clobbers */);
12842
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12843
-vcvtms_s64_f64 (float32_t a)
12845
- float32_t result;
12846
- __asm__ ("fcvtms %s0,%s1"
12849
- : /* No clobbers */);
12853
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12854
-vcvtms_u64_f64 (float32_t a)
12856
- float32_t result;
12857
- __asm__ ("fcvtmu %s0,%s1"
12860
- : /* No clobbers */);
12864
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12865
-vcvtn_s32_f32 (float32x2_t a)
12867
- int32x2_t result;
12868
- __asm__ ("fcvtns %0.2s, %1.2s"
12871
- : /* No clobbers */);
12875
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12876
-vcvtn_u32_f32 (float32x2_t a)
12878
- uint32x2_t result;
12879
- __asm__ ("fcvtnu %0.2s, %1.2s"
12882
- : /* No clobbers */);
12886
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12887
-vcvtnd_s64_f64 (float64_t a)
12889
- float64_t result;
12890
- __asm__ ("fcvtns %d0,%d1"
12893
- : /* No clobbers */);
12897
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12898
-vcvtnd_u64_f64 (float64_t a)
12900
- float64_t result;
12901
- __asm__ ("fcvtnu %d0,%d1"
12904
- : /* No clobbers */);
12908
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12909
-vcvtnq_s32_f32 (float32x4_t a)
12911
- int32x4_t result;
12912
- __asm__ ("fcvtns %0.4s, %1.4s"
12915
- : /* No clobbers */);
12919
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12920
-vcvtnq_s64_f64 (float64x2_t a)
12922
- int64x2_t result;
12923
- __asm__ ("fcvtns %0.2d, %1.2d"
12926
- : /* No clobbers */);
12930
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12931
-vcvtnq_u32_f32 (float32x4_t a)
12933
- uint32x4_t result;
12934
- __asm__ ("fcvtnu %0.4s, %1.4s"
12937
- : /* No clobbers */);
12941
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12942
-vcvtnq_u64_f64 (float64x2_t a)
12944
- uint64x2_t result;
12945
- __asm__ ("fcvtnu %0.2d, %1.2d"
12948
- : /* No clobbers */);
12952
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12953
-vcvtns_s64_f64 (float32_t a)
12955
- float32_t result;
12956
- __asm__ ("fcvtns %s0,%s1"
12959
- : /* No clobbers */);
12963
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12964
-vcvtns_u64_f64 (float32_t a)
12966
- float32_t result;
12967
- __asm__ ("fcvtnu %s0,%s1"
12970
- : /* No clobbers */);
12974
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12975
-vcvtp_s32_f32 (float32x2_t a)
12977
- int32x2_t result;
12978
- __asm__ ("fcvtps %0.2s, %1.2s"
12981
- : /* No clobbers */);
12985
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12986
-vcvtp_u32_f32 (float32x2_t a)
12988
- uint32x2_t result;
12989
- __asm__ ("fcvtpu %0.2s, %1.2s"
12992
- : /* No clobbers */);
12996
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12997
-vcvtpd_s64_f64 (float64_t a)
12999
- float64_t result;
13000
- __asm__ ("fcvtps %d0,%d1"
13003
- : /* No clobbers */);
13007
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13008
-vcvtpd_u64_f64 (float64_t a)
13010
- float64_t result;
13011
- __asm__ ("fcvtpu %d0,%d1"
13014
- : /* No clobbers */);
13018
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13019
-vcvtpq_s32_f32 (float32x4_t a)
13021
- int32x4_t result;
13022
- __asm__ ("fcvtps %0.4s, %1.4s"
13025
- : /* No clobbers */);
13029
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13030
-vcvtpq_s64_f64 (float64x2_t a)
13032
- int64x2_t result;
13033
- __asm__ ("fcvtps %0.2d, %1.2d"
13036
- : /* No clobbers */);
13040
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13041
-vcvtpq_u32_f32 (float32x4_t a)
13043
- uint32x4_t result;
13044
- __asm__ ("fcvtpu %0.4s, %1.4s"
13047
- : /* No clobbers */);
13051
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13052
-vcvtpq_u64_f64 (float64x2_t a)
13054
- uint64x2_t result;
13055
- __asm__ ("fcvtpu %0.2d, %1.2d"
13058
- : /* No clobbers */);
13062
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13063
-vcvtps_s64_f64 (float32_t a)
13065
- float32_t result;
13066
- __asm__ ("fcvtps %s0,%s1"
13069
- : /* No clobbers */);
13073
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13074
-vcvtps_u64_f64 (float32_t a)
13076
- float32_t result;
13077
- __asm__ ("fcvtpu %s0,%s1"
13080
- : /* No clobbers */);
13084
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13085
-vcvtq_f32_s32 (int32x4_t a)
13087
- float32x4_t result;
13088
- __asm__ ("scvtf %0.4s, %1.4s"
13091
- : /* No clobbers */);
13095
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13096
-vcvtq_f32_u32 (uint32x4_t a)
13098
- float32x4_t result;
13099
- __asm__ ("ucvtf %0.4s, %1.4s"
13102
- : /* No clobbers */);
13106
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13107
-vcvtq_f64_s64 (int64x2_t a)
13109
- float64x2_t result;
13110
- __asm__ ("scvtf %0.2d, %1.2d"
13113
- : /* No clobbers */);
13117
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13118
-vcvtq_f64_u64 (uint64x2_t a)
13120
- float64x2_t result;
13121
- __asm__ ("ucvtf %0.2d, %1.2d"
13124
- : /* No clobbers */);
13128
#define vcvtq_n_f32_s32(a, b) \
13131
@@ -6751,72 +5453,6 @@
13135
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13136
-vcvtq_s32_f32 (float32x4_t a)
13138
- int32x4_t result;
13139
- __asm__ ("fcvtzs %0.4s, %1.4s"
13142
- : /* No clobbers */);
13146
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13147
-vcvtq_s64_f64 (float64x2_t a)
13149
- int64x2_t result;
13150
- __asm__ ("fcvtzs %0.2d, %1.2d"
13153
- : /* No clobbers */);
13157
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13158
-vcvtq_u32_f32 (float32x4_t a)
13160
- uint32x4_t result;
13161
- __asm__ ("fcvtzu %0.4s, %1.4s"
13164
- : /* No clobbers */);
13168
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13169
-vcvtq_u64_f64 (float64x2_t a)
13171
- uint64x2_t result;
13172
- __asm__ ("fcvtzu %0.2d, %1.2d"
13175
- : /* No clobbers */);
13179
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13180
-vcvts_f64_s32 (int32_t a)
13183
- __asm__ ("scvtf %s0,%s1"
13186
- : /* No clobbers */);
13190
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13191
-vcvts_f64_u32 (uint32_t a)
13194
- __asm__ ("ucvtf %s0,%s1"
13197
- : /* No clobbers */);
13201
#define vcvts_n_f32_s32(a, b) \
13204
@@ -6865,28 +5501,6 @@
13208
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13209
-vcvts_s64_f64 (float32_t a)
13211
- float32_t result;
13212
- __asm__ ("fcvtzs %s0,%s1"
13215
- : /* No clobbers */);
13219
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13220
-vcvts_u64_f64 (float32_t a)
13222
- float32_t result;
13223
- __asm__ ("fcvtzu %s0,%s1"
13226
- : /* No clobbers */);
13230
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13231
vcvtx_f32_f64 (float64x2_t a)
13233
@@ -8962,303 +7576,6 @@
13237
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13238
-vmaxnm_f32 (float32x2_t a, float32x2_t b)
13240
- float32x2_t result;
13241
- __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s"
13244
- : /* No clobbers */);
13248
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13249
-vmaxnmq_f32 (float32x4_t a, float32x4_t b)
13251
- float32x4_t result;
13252
- __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s"
13255
- : /* No clobbers */);
13259
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13260
-vmaxnmq_f64 (float64x2_t a, float64x2_t b)
13262
- float64x2_t result;
13263
- __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d"
13266
- : /* No clobbers */);
13270
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13271
-vmaxnmvq_f32 (float32x4_t a)
13273
- float32_t result;
13274
- __asm__ ("fmaxnmv %s0,%1.4s"
13277
- : /* No clobbers */);
13281
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13282
-vmaxv_s8 (int8x8_t a)
13285
- __asm__ ("smaxv %b0,%1.8b"
13288
- : /* No clobbers */);
13292
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13293
-vmaxv_s16 (int16x4_t a)
13296
- __asm__ ("smaxv %h0,%1.4h"
13299
- : /* No clobbers */);
13303
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13304
-vmaxv_u8 (uint8x8_t a)
13307
- __asm__ ("umaxv %b0,%1.8b"
13310
- : /* No clobbers */);
13314
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13315
-vmaxv_u16 (uint16x4_t a)
13318
- __asm__ ("umaxv %h0,%1.4h"
13321
- : /* No clobbers */);
13325
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13326
-vmaxvq_f32 (float32x4_t a)
13328
- float32_t result;
13329
- __asm__ ("fmaxv %s0,%1.4s"
13332
- : /* No clobbers */);
13336
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13337
-vmaxvq_s8 (int8x16_t a)
13340
- __asm__ ("smaxv %b0,%1.16b"
13343
- : /* No clobbers */);
13347
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13348
-vmaxvq_s16 (int16x8_t a)
13351
- __asm__ ("smaxv %h0,%1.8h"
13354
- : /* No clobbers */);
13358
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13359
-vmaxvq_s32 (int32x4_t a)
13362
- __asm__ ("smaxv %s0,%1.4s"
13365
- : /* No clobbers */);
13369
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13370
-vmaxvq_u8 (uint8x16_t a)
13373
- __asm__ ("umaxv %b0,%1.16b"
13376
- : /* No clobbers */);
13380
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13381
-vmaxvq_u16 (uint16x8_t a)
13384
- __asm__ ("umaxv %h0,%1.8h"
13387
- : /* No clobbers */);
13391
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13392
-vmaxvq_u32 (uint32x4_t a)
13395
- __asm__ ("umaxv %s0,%1.4s"
13398
- : /* No clobbers */);
13402
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13403
-vminnmvq_f32 (float32x4_t a)
13405
- float32_t result;
13406
- __asm__ ("fminnmv %s0,%1.4s"
13409
- : /* No clobbers */);
13413
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13414
-vminv_s8 (int8x8_t a)
13417
- __asm__ ("sminv %b0,%1.8b"
13420
- : /* No clobbers */);
13424
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13425
-vminv_s16 (int16x4_t a)
13428
- __asm__ ("sminv %h0,%1.4h"
13431
- : /* No clobbers */);
13435
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13436
-vminv_u8 (uint8x8_t a)
13439
- __asm__ ("uminv %b0,%1.8b"
13442
- : /* No clobbers */);
13446
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13447
-vminv_u16 (uint16x4_t a)
13450
- __asm__ ("uminv %h0,%1.4h"
13453
- : /* No clobbers */);
13457
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13458
-vminvq_f32 (float32x4_t a)
13460
- float32_t result;
13461
- __asm__ ("fminv %s0,%1.4s"
13464
- : /* No clobbers */);
13468
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13469
-vminvq_s8 (int8x16_t a)
13472
- __asm__ ("sminv %b0,%1.16b"
13475
- : /* No clobbers */);
13479
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13480
-vminvq_s16 (int16x8_t a)
13483
- __asm__ ("sminv %h0,%1.8h"
13486
- : /* No clobbers */);
13490
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13491
-vminvq_s32 (int32x4_t a)
13494
- __asm__ ("sminv %s0,%1.4s"
13497
- : /* No clobbers */);
13501
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13502
-vminvq_u8 (uint8x16_t a)
13505
- __asm__ ("uminv %b0,%1.16b"
13508
- : /* No clobbers */);
13512
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13513
-vminvq_u16 (uint16x8_t a)
13516
- __asm__ ("uminv %h0,%1.8h"
13519
- : /* No clobbers */);
13523
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13524
-vminvq_u32 (uint32x4_t a)
13527
- __asm__ ("uminv %s0,%1.4s"
13530
- : /* No clobbers */);
13534
#define vmla_lane_f32(a, b, c, d) \
13537
@@ -14292,17 +12609,6 @@
13541
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13542
-vrecpe_f32 (float32x2_t a)
13544
- float32x2_t result;
13545
- __asm__ ("frecpe %0.2s,%1.2s"
13548
- : /* No clobbers */);
13552
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13553
vrecpe_u32 (uint32x2_t a)
13555
@@ -14314,39 +12620,6 @@
13559
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13560
-vrecped_f64 (float64_t a)
13562
- float64_t result;
13563
- __asm__ ("frecpe %d0,%d1"
13566
- : /* No clobbers */);
13570
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13571
-vrecpeq_f32 (float32x4_t a)
13573
- float32x4_t result;
13574
- __asm__ ("frecpe %0.4s,%1.4s"
13577
- : /* No clobbers */);
13581
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13582
-vrecpeq_f64 (float64x2_t a)
13584
- float64x2_t result;
13585
- __asm__ ("frecpe %0.2d,%1.2d"
13588
- : /* No clobbers */);
13592
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13593
vrecpeq_u32 (uint32x4_t a)
13595
@@ -14358,94 +12631,6 @@
13599
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13600
-vrecpes_f32 (float32_t a)
13602
- float32_t result;
13603
- __asm__ ("frecpe %s0,%s1"
13606
- : /* No clobbers */);
13610
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13611
-vrecps_f32 (float32x2_t a, float32x2_t b)
13613
- float32x2_t result;
13614
- __asm__ ("frecps %0.2s,%1.2s,%2.2s"
13617
- : /* No clobbers */);
13621
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13622
-vrecpsd_f64 (float64_t a, float64_t b)
13624
- float64_t result;
13625
- __asm__ ("frecps %d0,%d1,%d2"
13628
- : /* No clobbers */);
13632
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13633
-vrecpsq_f32 (float32x4_t a, float32x4_t b)
13635
- float32x4_t result;
13636
- __asm__ ("frecps %0.4s,%1.4s,%2.4s"
13639
- : /* No clobbers */);
13643
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13644
-vrecpsq_f64 (float64x2_t a, float64x2_t b)
13646
- float64x2_t result;
13647
- __asm__ ("frecps %0.2d,%1.2d,%2.2d"
13650
- : /* No clobbers */);
13654
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13655
-vrecpss_f32 (float32_t a, float32_t b)
13657
- float32_t result;
13658
- __asm__ ("frecps %s0,%s1,%s2"
13661
- : /* No clobbers */);
13665
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13666
-vrecpxd_f64 (float64_t a)
13668
- float64_t result;
13669
- __asm__ ("frecpe %d0,%d1"
13672
- : /* No clobbers */);
13676
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13677
-vrecpxs_f32 (float32_t a)
13679
- float32_t result;
13680
- __asm__ ("frecpe %s0,%s1"
13683
- : /* No clobbers */);
13687
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13688
vrev16_p8 (poly8x8_t a)
13690
@@ -14842,171 +13027,6 @@
13694
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13695
-vrnd_f32 (float32x2_t a)
13697
- float32x2_t result;
13698
- __asm__ ("frintz %0.2s,%1.2s"
13701
- : /* No clobbers */);
13705
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13706
-vrnda_f32 (float32x2_t a)
13708
- float32x2_t result;
13709
- __asm__ ("frinta %0.2s,%1.2s"
13712
- : /* No clobbers */);
13716
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13717
-vrndm_f32 (float32x2_t a)
13719
- float32x2_t result;
13720
- __asm__ ("frintm %0.2s,%1.2s"
13723
- : /* No clobbers */);
13727
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13728
-vrndn_f32 (float32x2_t a)
13730
- float32x2_t result;
13731
- __asm__ ("frintn %0.2s,%1.2s"
13734
- : /* No clobbers */);
13738
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13739
-vrndp_f32 (float32x2_t a)
13741
- float32x2_t result;
13742
- __asm__ ("frintp %0.2s,%1.2s"
13745
- : /* No clobbers */);
13749
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13750
-vrndq_f32 (float32x4_t a)
13752
- float32x4_t result;
13753
- __asm__ ("frintz %0.4s,%1.4s"
13756
- : /* No clobbers */);
13760
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13761
-vrndq_f64 (float64x2_t a)
13763
- float64x2_t result;
13764
- __asm__ ("frintz %0.2d,%1.2d"
13767
- : /* No clobbers */);
13771
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13772
-vrndqa_f32 (float32x4_t a)
13774
- float32x4_t result;
13775
- __asm__ ("frinta %0.4s,%1.4s"
13778
- : /* No clobbers */);
13782
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13783
-vrndqa_f64 (float64x2_t a)
13785
- float64x2_t result;
13786
- __asm__ ("frinta %0.2d,%1.2d"
13789
- : /* No clobbers */);
13793
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13794
-vrndqm_f32 (float32x4_t a)
13796
- float32x4_t result;
13797
- __asm__ ("frintm %0.4s,%1.4s"
13800
- : /* No clobbers */);
13804
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13805
-vrndqm_f64 (float64x2_t a)
13807
- float64x2_t result;
13808
- __asm__ ("frintm %0.2d,%1.2d"
13811
- : /* No clobbers */);
13815
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13816
-vrndqn_f32 (float32x4_t a)
13818
- float32x4_t result;
13819
- __asm__ ("frintn %0.4s,%1.4s"
13822
- : /* No clobbers */);
13826
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13827
-vrndqn_f64 (float64x2_t a)
13829
- float64x2_t result;
13830
- __asm__ ("frintn %0.2d,%1.2d"
13833
- : /* No clobbers */);
13837
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13838
-vrndqp_f32 (float32x4_t a)
13840
- float32x4_t result;
13841
- __asm__ ("frintp %0.4s,%1.4s"
13844
- : /* No clobbers */);
13848
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13849
-vrndqp_f64 (float64x2_t a)
13851
- float64x2_t result;
13852
- __asm__ ("frintp %0.2d,%1.2d"
13855
- : /* No clobbers */);
13859
#define vrshrn_high_n_s16(a, b, c) \
13862
@@ -18309,86 +16329,6 @@
13866
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13867
-vaddv_s32 (int32x2_t a)
13870
- __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13874
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13875
-vaddv_u32 (uint32x2_t a)
13878
- __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13882
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13883
-vmaxnmv_f32 (float32x2_t a)
13885
- float32_t result;
13886
- __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13890
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13891
-vminnmv_f32 (float32x2_t a)
13893
- float32_t result;
13894
- __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13898
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13899
-vmaxnmvq_f64 (float64x2_t a)
13901
- float64_t result;
13902
- __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
13906
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13907
-vmaxv_s32 (int32x2_t a)
13910
- __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13914
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13915
-vmaxv_u32 (uint32x2_t a)
13918
- __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13922
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13923
-vminnmvq_f64 (float64x2_t a)
13925
- float64_t result;
13926
- __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
13930
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13931
-vminv_s32 (int32x2_t a)
13934
- __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13938
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13939
-vminv_u32 (uint32x2_t a)
13942
- __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
13946
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13947
vpaddd_s64 (int64x2_t __a)
13949
@@ -19370,6 +17310,80 @@
13951
/* Start of optimal implementations in approved order. */
13955
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13956
+vabs_f32 (float32x2_t __a)
13958
+ return __builtin_aarch64_absv2sf (__a);
13961
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13962
+vabs_f64 (float64x1_t __a)
13964
+ return __builtin_fabs (__a);
13967
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13968
+vabs_s8 (int8x8_t __a)
13970
+ return __builtin_aarch64_absv8qi (__a);
13973
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13974
+vabs_s16 (int16x4_t __a)
13976
+ return __builtin_aarch64_absv4hi (__a);
13979
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13980
+vabs_s32 (int32x2_t __a)
13982
+ return __builtin_aarch64_absv2si (__a);
13985
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13986
+vabs_s64 (int64x1_t __a)
13988
+ return __builtin_llabs (__a);
13991
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13992
+vabsq_f32 (float32x4_t __a)
13994
+ return __builtin_aarch64_absv4sf (__a);
13997
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13998
+vabsq_f64 (float64x2_t __a)
14000
+ return __builtin_aarch64_absv2df (__a);
14003
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14004
+vabsq_s8 (int8x16_t __a)
14006
+ return __builtin_aarch64_absv16qi (__a);
14009
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14010
+vabsq_s16 (int16x8_t __a)
14012
+ return __builtin_aarch64_absv8hi (__a);
14015
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14016
+vabsq_s32 (int32x4_t __a)
14018
+ return __builtin_aarch64_absv4si (__a);
14021
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14022
+vabsq_s64 (int64x2_t __a)
14024
+ return __builtin_aarch64_absv2di (__a);
14029
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14030
@@ -19384,8 +17398,238 @@
14037
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
14038
+vaddv_s8 (int8x8_t __a)
14040
+ return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
14043
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
14044
+vaddv_s16 (int16x4_t __a)
14046
+ return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
14049
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
14050
+vaddv_s32 (int32x2_t __a)
14052
+ return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
14055
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14056
+vaddv_u8 (uint8x8_t __a)
14058
+ return vget_lane_u8 ((uint8x8_t)
14059
+ __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
14062
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14063
+vaddv_u16 (uint16x4_t __a)
14065
+ return vget_lane_u16 ((uint16x4_t)
14066
+ __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
14069
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14070
+vaddv_u32 (uint32x2_t __a)
14072
+ return vget_lane_u32 ((uint32x2_t)
14073
+ __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
14076
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
14077
+vaddvq_s8 (int8x16_t __a)
14079
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
14082
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
14083
+vaddvq_s16 (int16x8_t __a)
14085
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
14088
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
14089
+vaddvq_s32 (int32x4_t __a)
14091
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
14094
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
14095
+vaddvq_s64 (int64x2_t __a)
14097
+ return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
14100
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14101
+vaddvq_u8 (uint8x16_t __a)
14103
+ return vgetq_lane_u8 ((uint8x16_t)
14104
+ __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
14107
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14108
+vaddvq_u16 (uint16x8_t __a)
14110
+ return vgetq_lane_u16 ((uint16x8_t)
14111
+ __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
14114
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14115
+vaddvq_u32 (uint32x4_t __a)
14117
+ return vgetq_lane_u32 ((uint32x4_t)
14118
+ __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
14121
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14122
+vaddvq_u64 (uint64x2_t __a)
14124
+ return vgetq_lane_u64 ((uint64x2_t)
14125
+ __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
14128
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
14129
+vaddv_f32 (float32x2_t __a)
14131
+ float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
14132
+ return vget_lane_f32 (t, 0);
14135
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
14136
+vaddvq_f32 (float32x4_t __a)
14138
+ float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
14139
+ return vgetq_lane_f32 (t, 0);
14142
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
14143
+vaddvq_f64 (float64x2_t __a)
14145
+ float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
14146
+ return vgetq_lane_f64 (t, 0);
14151
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14152
+vcages_f32 (float32_t __a, float32_t __b)
14154
+ return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
14157
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14158
+vcage_f32 (float32x2_t __a, float32x2_t __b)
14160
+ return vabs_f32 (__a) >= vabs_f32 (__b);
14163
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14164
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
14166
+ return vabsq_f32 (__a) >= vabsq_f32 (__b);
14169
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14170
+vcaged_f64 (float64_t __a, float64_t __b)
14172
+ return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
14175
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14176
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
14178
+ return vabsq_f64 (__a) >= vabsq_f64 (__b);
14183
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14184
+vcagts_f32 (float32_t __a, float32_t __b)
14186
+ return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
14189
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14190
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
14192
+ return vabs_f32 (__a) > vabs_f32 (__b);
14195
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14196
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
14198
+ return vabsq_f32 (__a) > vabsq_f32 (__b);
14201
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14202
+vcagtd_f64 (float64_t __a, float64_t __b)
14204
+ return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
14207
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14208
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
14210
+ return vabsq_f64 (__a) > vabsq_f64 (__b);
14215
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14216
+vcale_f32 (float32x2_t __a, float32x2_t __b)
14218
+ return vabs_f32 (__a) <= vabs_f32 (__b);
14221
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14222
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
14224
+ return vabsq_f32 (__a) <= vabsq_f32 (__b);
14227
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14228
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
14230
+ return vabsq_f64 (__a) <= vabsq_f64 (__b);
14235
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14236
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
14238
+ return vabs_f32 (__a) < vabs_f32 (__b);
14241
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14242
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
14244
+ return vabsq_f32 (__a) < vabsq_f32 (__b);
14247
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14248
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
14250
+ return vabsq_f64 (__a) < vabsq_f64 (__b);
14253
+/* vceq - vector. */
14255
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14256
+vceq_f32 (float32x2_t __a, float32x2_t __b)
14258
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
14261
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14262
+vceq_f64 (float64x1_t __a, float64x1_t __b)
14264
+ return __a == __b ? -1ll : 0ll;
14267
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14268
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
14270
@@ -19414,7 +17658,7 @@
14271
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14272
vceq_s64 (int64x1_t __a, int64x1_t __b)
14274
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
14275
+ return __a == __b ? -1ll : 0ll;
14278
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14279
@@ -19441,10 +17685,21 @@
14280
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14281
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
14283
- return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
14284
- (int64x1_t) __b);
14285
+ return __a == __b ? -1ll : 0ll;
14288
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14289
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
14291
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14294
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14295
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
14297
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14300
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14301
vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
14303
@@ -19504,27 +17759,245 @@
14307
+/* vceq - scalar. */
14309
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14310
+vceqs_f32 (float32_t __a, float32_t __b)
14312
+ return __a == __b ? -1 : 0;
14315
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14316
vceqd_s64 (int64x1_t __a, int64x1_t __b)
14318
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
14319
+ return __a == __b ? -1ll : 0ll;
14322
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14323
vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
14325
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
14326
+ return __a == __b ? -1ll : 0ll;
14329
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14330
+vceqd_f64 (float64_t __a, float64_t __b)
14332
+ return __a == __b ? -1ll : 0ll;
14335
+/* vceqz - vector. */
14337
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14338
+vceqz_f32 (float32x2_t __a)
14340
+ float32x2_t __b = {0.0f, 0.0f};
14341
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
14344
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14345
+vceqz_f64 (float64x1_t __a)
14347
+ return __a == 0.0 ? -1ll : 0ll;
14350
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14351
+vceqz_p8 (poly8x8_t __a)
14353
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14354
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14358
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14359
+vceqz_s8 (int8x8_t __a)
14361
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14362
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
14365
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14366
+vceqz_s16 (int16x4_t __a)
14368
+ int16x4_t __b = {0, 0, 0, 0};
14369
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
14372
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14373
+vceqz_s32 (int32x2_t __a)
14375
+ int32x2_t __b = {0, 0};
14376
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
14379
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14380
+vceqz_s64 (int64x1_t __a)
14382
+ return __a == 0ll ? -1ll : 0ll;
14385
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14386
+vceqz_u8 (uint8x8_t __a)
14388
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14389
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14393
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14394
+vceqz_u16 (uint16x4_t __a)
14396
+ uint16x4_t __b = {0, 0, 0, 0};
14397
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
14398
+ (int16x4_t) __b);
14401
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14402
+vceqz_u32 (uint32x2_t __a)
14404
+ uint32x2_t __b = {0, 0};
14405
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
14406
+ (int32x2_t) __b);
14409
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14410
+vceqz_u64 (uint64x1_t __a)
14412
+ return __a == 0ll ? -1ll : 0ll;
14415
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14416
+vceqzq_f32 (float32x4_t __a)
14418
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14419
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14422
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14423
+vceqzq_f64 (float64x2_t __a)
14425
+ float64x2_t __b = {0.0, 0.0};
14426
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14429
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14430
+vceqzq_p8 (poly8x16_t __a)
14432
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14433
+ 0, 0, 0, 0, 0, 0, 0, 0};
14434
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14435
+ (int8x16_t) __b);
14438
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14439
+vceqzq_s8 (int8x16_t __a)
14441
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14442
+ 0, 0, 0, 0, 0, 0, 0, 0};
14443
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14446
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14447
+vceqzq_s16 (int16x8_t __a)
14449
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14450
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14453
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14454
+vceqzq_s32 (int32x4_t __a)
14456
+ int32x4_t __b = {0, 0, 0, 0};
14457
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14460
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14461
+vceqzq_s64 (int64x2_t __a)
14463
+ int64x2_t __b = {0, 0};
14464
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14467
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14468
+vceqzq_u8 (uint8x16_t __a)
14470
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14471
+ 0, 0, 0, 0, 0, 0, 0, 0};
14472
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14473
+ (int8x16_t) __b);
14476
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14477
+vceqzq_u16 (uint16x8_t __a)
14479
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14480
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14481
+ (int16x8_t) __b);
14484
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14485
+vceqzq_u32 (uint32x4_t __a)
14487
+ uint32x4_t __b = {0, 0, 0, 0};
14488
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14489
+ (int32x4_t) __b);
14492
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14493
+vceqzq_u64 (uint64x2_t __a)
14495
+ uint64x2_t __b = {0, 0};
14496
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14497
+ (int64x2_t) __b);
14500
+/* vceqz - scalar. */
14502
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14503
+vceqzs_f32 (float32_t __a)
14505
+ return __a == 0.0f ? -1 : 0;
14508
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14509
vceqzd_s64 (int64x1_t __a)
14511
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
14512
+ return __a == 0 ? -1ll : 0ll;
14516
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14517
+vceqzd_u64 (int64x1_t __a)
14519
+ return __a == 0 ? -1ll : 0ll;
14522
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14523
+vceqzd_f64 (float64_t __a)
14525
+ return __a == 0.0 ? -1ll : 0ll;
14528
+/* vcge - vector. */
14530
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14531
+vcge_f32 (float32x2_t __a, float32x2_t __b)
14533
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14536
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14537
+vcge_f64 (float64x1_t __a, float64x1_t __b)
14539
+ return __a >= __b ? -1ll : 0ll;
14542
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14543
+vcge_p8 (poly8x8_t __a, poly8x8_t __b)
14545
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
14549
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14550
vcge_s8 (int8x8_t __a, int8x8_t __b)
14552
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14553
@@ -19545,38 +18018,56 @@
14554
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14555
vcge_s64 (int64x1_t __a, int64x1_t __b)
14557
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
14558
+ return __a >= __b ? -1ll : 0ll;
14561
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14562
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
14564
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
14565
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
14569
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14570
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
14572
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
14573
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
14577
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14578
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
14580
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
14581
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
14585
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14586
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
14588
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
14589
- (int64x1_t) __b);
14590
+ return __a >= __b ? -1ll : 0ll;
14593
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14594
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
14596
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14599
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14600
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
14602
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14605
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14606
+vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
14608
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
14609
+ (int8x16_t) __b);
14612
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14613
vcgeq_s8 (int8x16_t __a, int8x16_t __b)
14615
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14616
@@ -19603,53 +18094,270 @@
14617
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14618
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
14620
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
14621
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
14625
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14626
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
14628
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
14629
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
14633
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14634
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
14636
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
14637
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
14641
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14642
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
14644
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
14645
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
14649
+/* vcge - scalar. */
14651
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14652
+vcges_f32 (float32_t __a, float32_t __b)
14654
+ return __a >= __b ? -1 : 0;
14657
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14658
vcged_s64 (int64x1_t __a, int64x1_t __b)
14660
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
14661
+ return __a >= __b ? -1ll : 0ll;
14664
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14665
vcged_u64 (uint64x1_t __a, uint64x1_t __b)
14667
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
14668
- (int64x1_t) __b);
14669
+ return __a >= __b ? -1ll : 0ll;
14672
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14673
+vcged_f64 (float64_t __a, float64_t __b)
14675
+ return __a >= __b ? -1ll : 0ll;
14678
+/* vcgez - vector. */
14680
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14681
+vcgez_f32 (float32x2_t __a)
14683
+ float32x2_t __b = {0.0f, 0.0f};
14684
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14687
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14688
+vcgez_f64 (float64x1_t __a)
14690
+ return __a >= 0.0 ? -1ll : 0ll;
14693
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14694
+vcgez_p8 (poly8x8_t __a)
14696
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14697
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
14701
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14702
+vcgez_s8 (int8x8_t __a)
14704
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14705
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14708
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14709
+vcgez_s16 (int16x4_t __a)
14711
+ int16x4_t __b = {0, 0, 0, 0};
14712
+ return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14715
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14716
+vcgez_s32 (int32x2_t __a)
14718
+ int32x2_t __b = {0, 0};
14719
+ return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14722
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14723
+vcgez_s64 (int64x1_t __a)
14725
+ return __a >= 0ll ? -1ll : 0ll;
14728
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14729
+vcgez_u8 (uint8x8_t __a)
14731
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14732
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
14736
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14737
+vcgez_u16 (uint16x4_t __a)
14739
+ uint16x4_t __b = {0, 0, 0, 0};
14740
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
14741
+ (int16x4_t) __b);
14744
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14745
+vcgez_u32 (uint32x2_t __a)
14747
+ uint32x2_t __b = {0, 0};
14748
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
14749
+ (int32x2_t) __b);
14752
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14753
+vcgez_u64 (uint64x1_t __a)
14755
+ return __a >= 0ll ? -1ll : 0ll;
14758
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14759
+vcgezq_f32 (float32x4_t __a)
14761
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14762
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14765
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14766
+vcgezq_f64 (float64x2_t __a)
14768
+ float64x2_t __b = {0.0, 0.0};
14769
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14772
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14773
+vcgezq_p8 (poly8x16_t __a)
14775
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14776
+ 0, 0, 0, 0, 0, 0, 0, 0};
14777
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
14778
+ (int8x16_t) __b);
14781
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14782
+vcgezq_s8 (int8x16_t __a)
14784
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14785
+ 0, 0, 0, 0, 0, 0, 0, 0};
14786
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14789
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14790
+vcgezq_s16 (int16x8_t __a)
14792
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14793
+ return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14796
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14797
+vcgezq_s32 (int32x4_t __a)
14799
+ int32x4_t __b = {0, 0, 0, 0};
14800
+ return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14803
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14804
+vcgezq_s64 (int64x2_t __a)
14806
+ int64x2_t __b = {0, 0};
14807
+ return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14810
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14811
+vcgezq_u8 (uint8x16_t __a)
14813
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14814
+ 0, 0, 0, 0, 0, 0, 0, 0};
14815
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
14816
+ (int8x16_t) __b);
14819
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14820
+vcgezq_u16 (uint16x8_t __a)
14822
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14823
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
14824
+ (int16x8_t) __b);
14827
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14828
+vcgezq_u32 (uint32x4_t __a)
14830
+ uint32x4_t __b = {0, 0, 0, 0};
14831
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
14832
+ (int32x4_t) __b);
14835
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14836
+vcgezq_u64 (uint64x2_t __a)
14838
+ uint64x2_t __b = {0, 0};
14839
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
14840
+ (int64x2_t) __b);
14843
+/* vcgez - scalar. */
14845
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14846
+vcgezs_f32 (float32_t __a)
14848
+ return __a >= 0.0f ? -1 : 0;
14851
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14852
vcgezd_s64 (int64x1_t __a)
14854
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
14855
+ return __a >= 0 ? -1ll : 0ll;
14859
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14860
+vcgezd_u64 (int64x1_t __a)
14862
+ return __a >= 0 ? -1ll : 0ll;
14865
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14866
+vcgezd_f64 (float64_t __a)
14868
+ return __a >= 0.0 ? -1ll : 0ll;
14871
+/* vcgt - vector. */
14873
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14874
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
14876
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14879
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14880
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
14882
+ return __a > __b ? -1ll : 0ll;
14885
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14886
+vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
14888
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
14892
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14893
vcgt_s8 (int8x8_t __a, int8x8_t __b)
14895
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14896
@@ -19670,38 +18378,56 @@
14897
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14898
vcgt_s64 (int64x1_t __a, int64x1_t __b)
14900
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
14901
+ return __a > __b ? -1ll : 0ll;
14904
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14905
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
14907
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
14908
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
14912
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14913
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
14915
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
14916
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
14920
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14921
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
14923
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
14924
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
14928
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14929
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
14931
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
14932
- (int64x1_t) __b);
14933
+ return __a > __b ? -1ll : 0ll;
14936
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14937
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
14939
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14942
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14943
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
14945
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14948
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14949
+vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
14951
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
14952
+ (int8x16_t) __b);
14955
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14956
vcgtq_s8 (int8x16_t __a, int8x16_t __b)
14958
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14959
@@ -19728,53 +18454,270 @@
14960
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14961
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
14963
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
14964
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
14968
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14969
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
14971
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
14972
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
14976
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14977
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
14979
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
14980
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
14984
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14985
vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
14987
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
14988
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
14992
+/* vcgt - scalar. */
14994
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14995
+vcgts_f32 (float32_t __a, float32_t __b)
14997
+ return __a > __b ? -1 : 0;
15000
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15001
vcgtd_s64 (int64x1_t __a, int64x1_t __b)
15003
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
15004
+ return __a > __b ? -1ll : 0ll;
15007
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15008
vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
15010
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
15011
- (int64x1_t) __b);
15012
+ return __a > __b ? -1ll : 0ll;
15015
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15016
+vcgtd_f64 (float64_t __a, float64_t __b)
15018
+ return __a > __b ? -1ll : 0ll;
15021
+/* vcgtz - vector. */
15023
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15024
+vcgtz_f32 (float32x2_t __a)
15026
+ float32x2_t __b = {0.0f, 0.0f};
15027
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
15030
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15031
+vcgtz_f64 (float64x1_t __a)
15033
+ return __a > 0.0 ? -1ll : 0ll;
15036
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15037
+vcgtz_p8 (poly8x8_t __a)
15039
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15040
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
15044
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15045
+vcgtz_s8 (int8x8_t __a)
15047
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15048
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
15051
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15052
+vcgtz_s16 (int16x4_t __a)
15054
+ int16x4_t __b = {0, 0, 0, 0};
15055
+ return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
15058
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15059
+vcgtz_s32 (int32x2_t __a)
15061
+ int32x2_t __b = {0, 0};
15062
+ return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
15065
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15066
+vcgtz_s64 (int64x1_t __a)
15068
+ return __a > 0ll ? -1ll : 0ll;
15071
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15072
+vcgtz_u8 (uint8x8_t __a)
15074
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15075
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
15079
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15080
+vcgtz_u16 (uint16x4_t __a)
15082
+ uint16x4_t __b = {0, 0, 0, 0};
15083
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
15084
+ (int16x4_t) __b);
15087
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15088
+vcgtz_u32 (uint32x2_t __a)
15090
+ uint32x2_t __b = {0, 0};
15091
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
15092
+ (int32x2_t) __b);
15095
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15096
+vcgtz_u64 (uint64x1_t __a)
15098
+ return __a > 0ll ? -1ll : 0ll;
15101
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15102
+vcgtzq_f32 (float32x4_t __a)
15104
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15105
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
15108
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15109
+vcgtzq_f64 (float64x2_t __a)
15111
+ float64x2_t __b = {0.0, 0.0};
15112
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
15115
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15116
+vcgtzq_p8 (poly8x16_t __a)
15118
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15119
+ 0, 0, 0, 0, 0, 0, 0, 0};
15120
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
15121
+ (int8x16_t) __b);
15124
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15125
+vcgtzq_s8 (int8x16_t __a)
15127
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15128
+ 0, 0, 0, 0, 0, 0, 0, 0};
15129
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
15132
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15133
+vcgtzq_s16 (int16x8_t __a)
15135
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15136
+ return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
15139
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15140
+vcgtzq_s32 (int32x4_t __a)
15142
+ int32x4_t __b = {0, 0, 0, 0};
15143
+ return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
15146
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15147
+vcgtzq_s64 (int64x2_t __a)
15149
+ int64x2_t __b = {0, 0};
15150
+ return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
15153
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15154
+vcgtzq_u8 (uint8x16_t __a)
15156
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15157
+ 0, 0, 0, 0, 0, 0, 0, 0};
15158
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
15159
+ (int8x16_t) __b);
15162
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15163
+vcgtzq_u16 (uint16x8_t __a)
15165
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15166
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
15167
+ (int16x8_t) __b);
15170
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15171
+vcgtzq_u32 (uint32x4_t __a)
15173
+ uint32x4_t __b = {0, 0, 0, 0};
15174
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
15175
+ (int32x4_t) __b);
15178
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15179
+vcgtzq_u64 (uint64x2_t __a)
15181
+ uint64x2_t __b = {0, 0};
15182
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
15183
+ (int64x2_t) __b);
15186
+/* vcgtz - scalar. */
15188
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15189
+vcgtzs_f32 (float32_t __a)
15191
+ return __a > 0.0f ? -1 : 0;
15194
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15195
vcgtzd_s64 (int64x1_t __a)
15197
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
15198
+ return __a > 0 ? -1ll : 0ll;
15202
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15203
+vcgtzd_u64 (int64x1_t __a)
15205
+ return __a > 0 ? -1ll : 0ll;
15208
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15209
+vcgtzd_f64 (float64_t __a)
15211
+ return __a > 0.0 ? -1ll : 0ll;
15214
+/* vcle - vector. */
15216
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15217
+vcle_f32 (float32x2_t __a, float32x2_t __b)
15219
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
15222
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15223
+vcle_f64 (float64x1_t __a, float64x1_t __b)
15225
+ return __a <= __b ? -1ll : 0ll;
15228
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15229
+vcle_p8 (poly8x8_t __a, poly8x8_t __b)
15231
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
15235
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15236
vcle_s8 (int8x8_t __a, int8x8_t __b)
15238
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
15239
@@ -19795,38 +18738,56 @@
15240
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15241
vcle_s64 (int64x1_t __a, int64x1_t __b)
15243
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
15244
+ return __a <= __b ? -1ll : 0ll;
15247
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15248
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
15250
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
15251
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
15255
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15256
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
15258
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
15259
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
15263
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15264
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
15266
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
15267
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
15271
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15272
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
15274
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
15275
- (int64x1_t) __a);
15276
+ return __a <= __b ? -1ll : 0ll;
15279
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15280
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
15282
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
15285
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15286
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
15288
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
15291
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15292
+vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
15294
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
15295
+ (int8x16_t) __a);
15298
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15299
vcleq_s8 (int8x16_t __a, int8x16_t __b)
15301
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
15302
@@ -19853,46 +18814,213 @@
15303
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15304
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
15306
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
15307
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
15311
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15312
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
15314
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
15315
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
15319
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15320
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
15322
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
15323
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
15327
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15328
vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
15330
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
15331
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
15335
+/* vcle - scalar. */
15337
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15338
+vcles_f32 (float32_t __a, float32_t __b)
15340
+ return __a <= __b ? -1 : 0;
15343
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15344
vcled_s64 (int64x1_t __a, int64x1_t __b)
15346
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
15347
+ return __a <= __b ? -1ll : 0ll;
15350
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15351
+vcled_u64 (uint64x1_t __a, uint64x1_t __b)
15353
+ return __a <= __b ? -1ll : 0ll;
15356
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15357
+vcled_f64 (float64_t __a, float64_t __b)
15359
+ return __a <= __b ? -1ll : 0ll;
15362
+/* vclez - vector. */
15364
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15365
+vclez_f32 (float32x2_t __a)
15367
+ float32x2_t __b = {0.0f, 0.0f};
15368
+ return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
15371
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15372
+vclez_f64 (float64x1_t __a)
15374
+ return __a <= 0.0 ? -1ll : 0ll;
15377
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15378
+vclez_p8 (poly8x8_t __a)
15380
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15381
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
15385
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15386
+vclez_s8 (int8x8_t __a)
15388
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15389
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
15392
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15393
+vclez_s16 (int16x4_t __a)
15395
+ int16x4_t __b = {0, 0, 0, 0};
15396
+ return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
15399
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15400
+vclez_s32 (int32x2_t __a)
15402
+ int32x2_t __b = {0, 0};
15403
+ return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
15406
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15407
+vclez_s64 (int64x1_t __a)
15409
+ return __a <= 0ll ? -1ll : 0ll;
15412
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15413
+vclez_u64 (uint64x1_t __a)
15415
+ return __a <= 0ll ? -1ll : 0ll;
15418
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15419
+vclezq_f32 (float32x4_t __a)
15421
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15422
+ return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
15425
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15426
+vclezq_f64 (float64x2_t __a)
15428
+ float64x2_t __b = {0.0, 0.0};
15429
+ return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
15432
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15433
+vclezq_p8 (poly8x16_t __a)
15435
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15436
+ 0, 0, 0, 0, 0, 0, 0, 0};
15437
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
15438
+ (int8x16_t) __b);
15441
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15442
+vclezq_s8 (int8x16_t __a)
15444
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15445
+ 0, 0, 0, 0, 0, 0, 0, 0};
15446
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
15449
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15450
+vclezq_s16 (int16x8_t __a)
15452
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15453
+ return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
15456
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15457
+vclezq_s32 (int32x4_t __a)
15459
+ int32x4_t __b = {0, 0, 0, 0};
15460
+ return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
15463
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15464
+vclezq_s64 (int64x2_t __a)
15466
+ int64x2_t __b = {0, 0};
15467
+ return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
15470
+/* vclez - scalar. */
15472
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15473
+vclezs_f32 (float32_t __a)
15475
+ return __a <= 0.0f ? -1 : 0;
15478
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15479
vclezd_s64 (int64x1_t __a)
15481
- return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
15482
+ return __a <= 0 ? -1ll : 0ll;
15486
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15487
+vclezd_u64 (int64x1_t __a)
15489
+ return __a <= 0 ? -1ll : 0ll;
15492
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15493
+vclezd_f64 (float64_t __a)
15495
+ return __a <= 0.0 ? -1ll : 0ll;
15498
+/* vclt - vector. */
15500
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15501
+vclt_f32 (float32x2_t __a, float32x2_t __b)
15503
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
15506
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15507
+vclt_f64 (float64x1_t __a, float64x1_t __b)
15509
+ return __a < __b ? -1ll : 0ll;
15512
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15513
+vclt_p8 (poly8x8_t __a, poly8x8_t __b)
15515
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
15519
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15520
vclt_s8 (int8x8_t __a, int8x8_t __b)
15522
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
15523
@@ -19913,38 +19041,56 @@
15524
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15525
vclt_s64 (int64x1_t __a, int64x1_t __b)
15527
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
15528
+ return __a < __b ? -1ll : 0ll;
15531
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15532
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
15534
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
15535
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
15539
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15540
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
15542
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
15543
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
15547
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15548
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
15550
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
15551
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
15555
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15556
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
15558
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
15559
- (int64x1_t) __a);
15560
+ return __a < __b ? -1ll : 0ll;
15563
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15564
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
15566
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
15569
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15570
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
15572
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
15575
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15576
+vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
15578
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
15579
+ (int8x16_t) __a);
15582
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15583
vcltq_s8 (int8x16_t __a, int8x16_t __b)
15585
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
15586
@@ -19971,91 +19117,664 @@
15587
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15588
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
15590
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
15591
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
15595
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15596
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
15598
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
15599
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
15603
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15604
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
15606
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
15607
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
15611
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15612
vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
15614
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
15615
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
15619
+/* vclt - scalar. */
15621
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15622
+vclts_f32 (float32_t __a, float32_t __b)
15624
+ return __a < __b ? -1 : 0;
15627
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15628
vcltd_s64 (int64x1_t __a, int64x1_t __b)
15630
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
15631
+ return __a < __b ? -1ll : 0ll;
15634
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15635
+vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
15637
+ return __a < __b ? -1ll : 0ll;
15640
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15641
+vcltd_f64 (float64_t __a, float64_t __b)
15643
+ return __a < __b ? -1ll : 0ll;
15646
+/* vcltz - vector. */
15648
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15649
+vcltz_f32 (float32x2_t __a)
15651
+ float32x2_t __b = {0.0f, 0.0f};
15652
+ return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
15655
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15656
+vcltz_f64 (float64x1_t __a)
15658
+ return __a < 0.0 ? -1ll : 0ll;
15661
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15662
+vcltz_p8 (poly8x8_t __a)
15664
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15665
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
15669
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15670
+vcltz_s8 (int8x8_t __a)
15672
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15673
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
15676
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15677
+vcltz_s16 (int16x4_t __a)
15679
+ int16x4_t __b = {0, 0, 0, 0};
15680
+ return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
15683
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15684
+vcltz_s32 (int32x2_t __a)
15686
+ int32x2_t __b = {0, 0};
15687
+ return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
15690
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15691
+vcltz_s64 (int64x1_t __a)
15693
+ return __a < 0ll ? -1ll : 0ll;
15696
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15697
+vcltzq_f32 (float32x4_t __a)
15699
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15700
+ return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
15703
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15704
+vcltzq_f64 (float64x2_t __a)
15706
+ float64x2_t __b = {0.0, 0.0};
15707
+ return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
15710
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15711
+vcltzq_p8 (poly8x16_t __a)
15713
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15714
+ 0, 0, 0, 0, 0, 0, 0, 0};
15715
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
15716
+ (int8x16_t) __b);
15719
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15720
+vcltzq_s8 (int8x16_t __a)
15722
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15723
+ 0, 0, 0, 0, 0, 0, 0, 0};
15724
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
15727
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15728
+vcltzq_s16 (int16x8_t __a)
15730
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15731
+ return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
15734
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15735
+vcltzq_s32 (int32x4_t __a)
15737
+ int32x4_t __b = {0, 0, 0, 0};
15738
+ return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
15741
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15742
+vcltzq_s64 (int64x2_t __a)
15744
+ int64x2_t __b = {0, 0};
15745
+ return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
15748
+/* vcltz - scalar. */
15750
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15751
+vcltzs_f32 (float32_t __a)
15753
+ return __a < 0.0f ? -1 : 0;
15756
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15757
vcltzd_s64 (int64x1_t __a)
15759
- return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
15760
+ return __a < 0 ? -1ll : 0ll;
15763
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15764
+vcltzd_u64 (int64x1_t __a)
15766
+ return __a < 0 ? -1ll : 0ll;
15769
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15770
+vcltzd_f64 (float64_t __a)
15772
+ return __a < 0.0 ? -1ll : 0ll;
15775
+/* vcvt (double -> float). */
15777
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15778
+vcvt_f32_f64 (float64x2_t __a)
15780
+ return __builtin_aarch64_float_truncate_lo_v2sf (__a);
15783
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15784
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
15786
+ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
15789
+/* vcvt (float -> double). */
15791
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15792
+vcvt_f64_f32 (float32x2_t __a)
15795
+ return __builtin_aarch64_float_extend_lo_v2df (__a);
15798
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15799
+vcvt_high_f64_f32 (float32x4_t __a)
15801
+ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
15804
+/* vcvt (<u>int -> float) */
15806
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
15807
+vcvtd_f64_s64 (int64_t __a)
15809
+ return (float64_t) __a;
15812
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
15813
+vcvtd_f64_u64 (uint64_t __a)
15815
+ return (float64_t) __a;
15818
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
15819
+vcvts_f32_s32 (int32_t __a)
15821
+ return (float32_t) __a;
15824
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
15825
+vcvts_f32_u32 (uint32_t __a)
15827
+ return (float32_t) __a;
15830
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15831
+vcvt_f32_s32 (int32x2_t __a)
15833
+ return __builtin_aarch64_floatv2siv2sf (__a);
15836
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15837
+vcvt_f32_u32 (uint32x2_t __a)
15839
+ return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
15842
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15843
+vcvtq_f32_s32 (int32x4_t __a)
15845
+ return __builtin_aarch64_floatv4siv4sf (__a);
15848
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15849
+vcvtq_f32_u32 (uint32x4_t __a)
15851
+ return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
15854
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15855
+vcvtq_f64_s64 (int64x2_t __a)
15857
+ return __builtin_aarch64_floatv2div2df (__a);
15860
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15861
+vcvtq_f64_u64 (uint64x2_t __a)
15863
+ return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
15866
+/* vcvt (float -> <u>int) */
15868
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
15869
+vcvtd_s64_f64 (float64_t __a)
15871
+ return (int64_t) __a;
15874
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15875
+vcvtd_u64_f64 (float64_t __a)
15877
+ return (uint64_t) __a;
15880
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
15881
+vcvts_s32_f32 (float32_t __a)
15883
+ return (int32_t) __a;
15886
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15887
+vcvts_u32_f32 (float32_t __a)
15889
+ return (uint32_t) __a;
15892
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15893
+vcvt_s32_f32 (float32x2_t __a)
15895
+ return __builtin_aarch64_lbtruncv2sfv2si (__a);
15898
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15899
+vcvt_u32_f32 (float32x2_t __a)
15901
+ /* TODO: This cast should go away when builtins have
15902
+ their correct types. */
15903
+ return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
15906
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15907
+vcvtq_s32_f32 (float32x4_t __a)
15909
+ return __builtin_aarch64_lbtruncv4sfv4si (__a);
15912
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15913
+vcvtq_u32_f32 (float32x4_t __a)
15915
+ /* TODO: This cast should go away when builtins have
15916
+ their correct types. */
15917
+ return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
15920
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15921
+vcvtq_s64_f64 (float64x2_t __a)
15923
+ return __builtin_aarch64_lbtruncv2dfv2di (__a);
15926
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15927
+vcvtq_u64_f64 (float64x2_t __a)
15929
+ /* TODO: This cast should go away when builtins have
15930
+ their correct types. */
15931
+ return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
15936
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
15937
+vcvtad_s64_f64 (float64_t __a)
15939
+ return __builtin_aarch64_lrounddfdi (__a);
15942
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15943
+vcvtad_u64_f64 (float64_t __a)
15945
+ return __builtin_aarch64_lroundudfdi (__a);
15948
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
15949
+vcvtas_s32_f32 (float32_t __a)
15951
+ return __builtin_aarch64_lroundsfsi (__a);
15954
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15955
+vcvtas_u32_f32 (float32_t __a)
15957
+ return __builtin_aarch64_lroundusfsi (__a);
15960
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15961
+vcvta_s32_f32 (float32x2_t __a)
15963
+ return __builtin_aarch64_lroundv2sfv2si (__a);
15966
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15967
+vcvta_u32_f32 (float32x2_t __a)
15969
+ /* TODO: This cast should go away when builtins have
15970
+ their correct types. */
15971
+ return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
15974
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15975
+vcvtaq_s32_f32 (float32x4_t __a)
15977
+ return __builtin_aarch64_lroundv4sfv4si (__a);
15980
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15981
+vcvtaq_u32_f32 (float32x4_t __a)
15983
+ /* TODO: This cast should go away when builtins have
15984
+ their correct types. */
15985
+ return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
15988
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15989
+vcvtaq_s64_f64 (float64x2_t __a)
15991
+ return __builtin_aarch64_lroundv2dfv2di (__a);
15994
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15995
+vcvtaq_u64_f64 (float64x2_t __a)
15997
+ /* TODO: This cast should go away when builtins have
15998
+ their correct types. */
15999
+ return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
16004
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
16005
+vcvtmd_s64_f64 (float64_t __a)
16007
+ return __builtin_lfloor (__a);
16010
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16011
+vcvtmd_u64_f64 (float64_t __a)
16013
+ return __builtin_aarch64_lfloorudfdi (__a);
16016
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16017
+vcvtms_s32_f32 (float32_t __a)
16019
+ return __builtin_ifloorf (__a);
16022
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16023
+vcvtms_u32_f32 (float32_t __a)
16025
+ return __builtin_aarch64_lfloorusfsi (__a);
16028
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16029
+vcvtm_s32_f32 (float32x2_t __a)
16031
+ return __builtin_aarch64_lfloorv2sfv2si (__a);
16034
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16035
+vcvtm_u32_f32 (float32x2_t __a)
16037
+ /* TODO: This cast should go away when builtins have
16038
+ their correct types. */
16039
+ return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
16042
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16043
+vcvtmq_s32_f32 (float32x4_t __a)
16045
+ return __builtin_aarch64_lfloorv4sfv4si (__a);
16048
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16049
+vcvtmq_u32_f32 (float32x4_t __a)
16051
+ /* TODO: This cast should go away when builtins have
16052
+ their correct types. */
16053
+ return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
16056
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16057
+vcvtmq_s64_f64 (float64x2_t __a)
16059
+ return __builtin_aarch64_lfloorv2dfv2di (__a);
16062
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16063
+vcvtmq_u64_f64 (float64x2_t __a)
16065
+ /* TODO: This cast should go away when builtins have
16066
+ their correct types. */
16067
+ return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
16072
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
16073
+vcvtnd_s64_f64 (float64_t __a)
16075
+ return __builtin_aarch64_lfrintndfdi (__a);
16078
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16079
+vcvtnd_u64_f64 (float64_t __a)
16081
+ return __builtin_aarch64_lfrintnudfdi (__a);
16084
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16085
+vcvtns_s32_f32 (float32_t __a)
16087
+ return __builtin_aarch64_lfrintnsfsi (__a);
16090
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16091
+vcvtns_u32_f32 (float32_t __a)
16093
+ return __builtin_aarch64_lfrintnusfsi (__a);
16096
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16097
+vcvtn_s32_f32 (float32x2_t __a)
16099
+ return __builtin_aarch64_lfrintnv2sfv2si (__a);
16102
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16103
+vcvtn_u32_f32 (float32x2_t __a)
16105
+ /* TODO: This cast should go away when builtins have
16106
+ their correct types. */
16107
+ return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
16110
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16111
+vcvtnq_s32_f32 (float32x4_t __a)
16113
+ return __builtin_aarch64_lfrintnv4sfv4si (__a);
16116
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16117
+vcvtnq_u32_f32 (float32x4_t __a)
16119
+ /* TODO: This cast should go away when builtins have
16120
+ their correct types. */
16121
+ return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
16124
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16125
+vcvtnq_s64_f64 (float64x2_t __a)
16127
+ return __builtin_aarch64_lfrintnv2dfv2di (__a);
16130
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16131
+vcvtnq_u64_f64 (float64x2_t __a)
16133
+ /* TODO: This cast should go away when builtins have
16134
+ their correct types. */
16135
+ return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
16140
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
16141
+vcvtpd_s64_f64 (float64_t __a)
16143
+ return __builtin_lceil (__a);
16146
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16147
+vcvtpd_u64_f64 (float64_t __a)
16149
+ return __builtin_aarch64_lceiludfdi (__a);
16152
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16153
+vcvtps_s32_f32 (float32_t __a)
16155
+ return __builtin_iceilf (__a);
16158
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16159
+vcvtps_u32_f32 (float32_t __a)
16161
+ return __builtin_aarch64_lceilusfsi (__a);
16164
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16165
+vcvtp_s32_f32 (float32x2_t __a)
16167
+ return __builtin_aarch64_lceilv2sfv2si (__a);
16170
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16171
+vcvtp_u32_f32 (float32x2_t __a)
16173
+ /* TODO: This cast should go away when builtins have
16174
+ their correct types. */
16175
+ return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
16178
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16179
+vcvtpq_s32_f32 (float32x4_t __a)
16181
+ return __builtin_aarch64_lceilv4sfv4si (__a);
16184
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16185
+vcvtpq_u32_f32 (float32x4_t __a)
16187
+ /* TODO: This cast should go away when builtins have
16188
+ their correct types. */
16189
+ return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
16192
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16193
+vcvtpq_s64_f64 (float64x2_t __a)
16195
+ return __builtin_aarch64_lceilv2dfv2di (__a);
16198
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16199
+vcvtpq_u64_f64 (float64x2_t __a)
16201
+ /* TODO: This cast should go away when builtins have
16202
+ their correct types. */
16203
+ return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
16208
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
16209
vdupb_lane_s8 (int8x16_t a, int const b)
16211
- return __builtin_aarch64_dup_laneqi (a, b);
16212
+ return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
16215
__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
16216
vdupb_lane_u8 (uint8x16_t a, int const b)
16218
- return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b);
16219
+ return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
16222
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
16223
vduph_lane_s16 (int16x8_t a, int const b)
16225
- return __builtin_aarch64_dup_lanehi (a, b);
16226
+ return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
16229
__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
16230
vduph_lane_u16 (uint16x8_t a, int const b)
16232
- return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b);
16233
+ return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
16236
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
16237
vdups_lane_s32 (int32x4_t a, int const b)
16239
- return __builtin_aarch64_dup_lanesi (a, b);
16240
+ return __builtin_aarch64_dup_lane_scalarv4si (a, b);
16243
__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
16244
vdups_lane_u32 (uint32x4_t a, int const b)
16246
- return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b);
16247
+ return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
16250
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16251
vdupd_lane_s64 (int64x2_t a, int const b)
16253
- return __builtin_aarch64_dup_lanedi (a, b);
16254
+ return __builtin_aarch64_dup_lane_scalarv2di (a, b);
16257
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16258
vdupd_lane_u64 (uint64x2_t a, int const b)
16260
- return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
16261
+ return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
16265
@@ -21088,7 +20807,7 @@
16266
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16267
vmax_f32 (float32x2_t __a, float32x2_t __b)
16269
- return __builtin_aarch64_fmaxv2sf (__a, __b);
16270
+ return __builtin_aarch64_smax_nanv2sf (__a, __b);
16273
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16274
@@ -21133,13 +20852,13 @@
16275
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16276
vmaxq_f32 (float32x4_t __a, float32x4_t __b)
16278
- return __builtin_aarch64_fmaxv4sf (__a, __b);
16279
+ return __builtin_aarch64_smax_nanv4sf (__a, __b);
16282
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16283
vmaxq_f64 (float64x2_t __a, float64x2_t __b)
16285
- return __builtin_aarch64_fmaxv2df (__a, __b);
16286
+ return __builtin_aarch64_smax_nanv2df (__a, __b);
16289
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16290
@@ -21181,12 +20900,150 @@
16297
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16298
+vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
16300
+ return __builtin_aarch64_smaxv2sf (__a, __b);
16303
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16304
+vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
16306
+ return __builtin_aarch64_smaxv4sf (__a, __b);
16309
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16310
+vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
16312
+ return __builtin_aarch64_smaxv2df (__a, __b);
16317
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16318
+vmaxv_f32 (float32x2_t __a)
16320
+ return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
16323
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
16324
+vmaxv_s8 (int8x8_t __a)
16326
+ return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
16329
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
16330
+vmaxv_s16 (int16x4_t __a)
16332
+ return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
16335
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16336
+vmaxv_s32 (int32x2_t __a)
16338
+ return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
16341
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16342
+vmaxv_u8 (uint8x8_t __a)
16344
+ return vget_lane_u8 ((uint8x8_t)
16345
+ __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
16348
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16349
+vmaxv_u16 (uint16x4_t __a)
16351
+ return vget_lane_u16 ((uint16x4_t)
16352
+ __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
16355
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16356
+vmaxv_u32 (uint32x2_t __a)
16358
+ return vget_lane_u32 ((uint32x2_t)
16359
+ __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
16362
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16363
+vmaxvq_f32 (float32x4_t __a)
16365
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
16368
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16369
+vmaxvq_f64 (float64x2_t __a)
16371
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
16374
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
16375
+vmaxvq_s8 (int8x16_t __a)
16377
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
16380
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
16381
+vmaxvq_s16 (int16x8_t __a)
16383
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
16386
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16387
+vmaxvq_s32 (int32x4_t __a)
16389
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
16392
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16393
+vmaxvq_u8 (uint8x16_t __a)
16395
+ return vgetq_lane_u8 ((uint8x16_t)
16396
+ __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
16399
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16400
+vmaxvq_u16 (uint16x8_t __a)
16402
+ return vgetq_lane_u16 ((uint16x8_t)
16403
+ __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
16406
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16407
+vmaxvq_u32 (uint32x4_t __a)
16409
+ return vgetq_lane_u32 ((uint32x4_t)
16410
+ __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
16415
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16416
+vmaxnmv_f32 (float32x2_t __a)
16418
+ return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
16421
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16422
+vmaxnmvq_f32 (float32x4_t __a)
16424
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
16427
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16428
+vmaxnmvq_f64 (float64x2_t __a)
16430
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
16435
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16436
vmin_f32 (float32x2_t __a, float32x2_t __b)
16438
- return __builtin_aarch64_fminv2sf (__a, __b);
16439
+ return __builtin_aarch64_smin_nanv2sf (__a, __b);
16442
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16443
@@ -21231,13 +21088,13 @@
16444
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16445
vminq_f32 (float32x4_t __a, float32x4_t __b)
16447
- return __builtin_aarch64_fminv4sf (__a, __b);
16448
+ return __builtin_aarch64_smin_nanv4sf (__a, __b);
16451
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16452
vminq_f64 (float64x2_t __a, float64x2_t __b)
16454
- return __builtin_aarch64_fminv2df (__a, __b);
16455
+ return __builtin_aarch64_smin_nanv2df (__a, __b);
16458
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16459
@@ -21279,6 +21136,144 @@
16465
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16466
+vminnm_f32 (float32x2_t __a, float32x2_t __b)
16468
+ return __builtin_aarch64_sminv2sf (__a, __b);
16471
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16472
+vminnmq_f32 (float32x4_t __a, float32x4_t __b)
16474
+ return __builtin_aarch64_sminv4sf (__a, __b);
16477
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16478
+vminnmq_f64 (float64x2_t __a, float64x2_t __b)
16480
+ return __builtin_aarch64_sminv2df (__a, __b);
16485
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16486
+vminv_f32 (float32x2_t __a)
16488
+ return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
16491
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
16492
+vminv_s8 (int8x8_t __a)
16494
+ return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
16497
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
16498
+vminv_s16 (int16x4_t __a)
16500
+ return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
16503
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16504
+vminv_s32 (int32x2_t __a)
16506
+ return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
16509
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16510
+vminv_u8 (uint8x8_t __a)
16512
+ return vget_lane_u8 ((uint8x8_t)
16513
+ __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
16516
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16517
+vminv_u16 (uint16x4_t __a)
16519
+ return vget_lane_u16 ((uint16x4_t)
16520
+ __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
16523
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16524
+vminv_u32 (uint32x2_t __a)
16526
+ return vget_lane_u32 ((uint32x2_t)
16527
+ __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
16530
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16531
+vminvq_f32 (float32x4_t __a)
16533
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
16536
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16537
+vminvq_f64 (float64x2_t __a)
16539
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
16542
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
16543
+vminvq_s8 (int8x16_t __a)
16545
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
16548
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
16549
+vminvq_s16 (int16x8_t __a)
16551
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
16554
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
16555
+vminvq_s32 (int32x4_t __a)
16557
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
16560
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16561
+vminvq_u8 (uint8x16_t __a)
16563
+ return vgetq_lane_u8 ((uint8x16_t)
16564
+ __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
16567
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16568
+vminvq_u16 (uint16x8_t __a)
16570
+ return vgetq_lane_u16 ((uint16x8_t)
16571
+ __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
16574
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16575
+vminvq_u32 (uint32x4_t __a)
16577
+ return vgetq_lane_u32 ((uint32x4_t)
16578
+ __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
16583
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16584
+vminnmv_f32 (float32x2_t __a)
16586
+ return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
16589
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16590
+vminnmvq_f32 (float32x4_t __a)
16592
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
16595
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16596
+vminnmvq_f64 (float64x2_t __a)
16598
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
16603
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16604
@@ -22795,6 +22790,223 @@
16605
return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
16610
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16611
+vrecpes_f32 (float32_t __a)
16613
+ return __builtin_aarch64_frecpesf (__a);
16616
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16617
+vrecped_f64 (float64_t __a)
16619
+ return __builtin_aarch64_frecpedf (__a);
16622
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16623
+vrecpe_f32 (float32x2_t __a)
16625
+ return __builtin_aarch64_frecpev2sf (__a);
16628
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16629
+vrecpeq_f32 (float32x4_t __a)
16631
+ return __builtin_aarch64_frecpev4sf (__a);
16634
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16635
+vrecpeq_f64 (float64x2_t __a)
16637
+ return __builtin_aarch64_frecpev2df (__a);
16642
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16643
+vrecpss_f32 (float32_t __a, float32_t __b)
16645
+ return __builtin_aarch64_frecpssf (__a, __b);
16648
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16649
+vrecpsd_f64 (float64_t __a, float64_t __b)
16651
+ return __builtin_aarch64_frecpsdf (__a, __b);
16654
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16655
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
16657
+ return __builtin_aarch64_frecpsv2sf (__a, __b);
16660
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16661
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
16663
+ return __builtin_aarch64_frecpsv4sf (__a, __b);
16666
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16667
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
16669
+ return __builtin_aarch64_frecpsv2df (__a, __b);
16674
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
16675
+vrecpxs_f32 (float32_t __a)
16677
+ return __builtin_aarch64_frecpxsf (__a);
16680
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
16681
+vrecpxd_f64 (float64_t __a)
16683
+ return __builtin_aarch64_frecpxdf (__a);
16688
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16689
+vrnd_f32 (float32x2_t __a)
16691
+ return __builtin_aarch64_btruncv2sf (__a);
16694
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16695
+vrndq_f32 (float32x4_t __a)
16697
+ return __builtin_aarch64_btruncv4sf (__a);
16700
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16701
+vrndq_f64 (float64x2_t __a)
16703
+ return __builtin_aarch64_btruncv2df (__a);
16708
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16709
+vrnda_f32 (float32x2_t __a)
16711
+ return __builtin_aarch64_roundv2sf (__a);
16714
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16715
+vrndaq_f32 (float32x4_t __a)
16717
+ return __builtin_aarch64_roundv4sf (__a);
16720
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16721
+vrndaq_f64 (float64x2_t __a)
16723
+ return __builtin_aarch64_roundv2df (__a);
16728
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16729
+vrndi_f32 (float32x2_t __a)
16731
+ return __builtin_aarch64_nearbyintv2sf (__a);
16734
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16735
+vrndiq_f32 (float32x4_t __a)
16737
+ return __builtin_aarch64_nearbyintv4sf (__a);
16740
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16741
+vrndiq_f64 (float64x2_t __a)
16743
+ return __builtin_aarch64_nearbyintv2df (__a);
16748
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16749
+vrndm_f32 (float32x2_t __a)
16751
+ return __builtin_aarch64_floorv2sf (__a);
16754
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16755
+vrndmq_f32 (float32x4_t __a)
16757
+ return __builtin_aarch64_floorv4sf (__a);
16760
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16761
+vrndmq_f64 (float64x2_t __a)
16763
+ return __builtin_aarch64_floorv2df (__a);
16768
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16769
+vrndn_f32 (float32x2_t __a)
16771
+ return __builtin_aarch64_frintnv2sf (__a);
16773
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16774
+vrndnq_f32 (float32x4_t __a)
16776
+ return __builtin_aarch64_frintnv4sf (__a);
16779
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16780
+vrndnq_f64 (float64x2_t __a)
16782
+ return __builtin_aarch64_frintnv2df (__a);
16787
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16788
+vrndp_f32 (float32x2_t __a)
16790
+ return __builtin_aarch64_ceilv2sf (__a);
16793
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16794
+vrndpq_f32 (float32x4_t __a)
16796
+ return __builtin_aarch64_ceilv4sf (__a);
16799
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16800
+vrndpq_f64 (float64x2_t __a)
16802
+ return __builtin_aarch64_ceilv2df (__a);
16807
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16808
+vrndx_f32 (float32x2_t __a)
16810
+ return __builtin_aarch64_rintv2sf (__a);
16813
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16814
+vrndxq_f32 (float32x4_t __a)
16816
+ return __builtin_aarch64_rintv4sf (__a);
16819
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16820
+vrndxq_f64 (float64x2_t __a)
16822
+ return __builtin_aarch64_rintv2df (__a);
16827
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16828
@@ -23138,109 +23350,109 @@
16829
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16830
vshl_n_s8 (int8x8_t __a, const int __b)
16832
- return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b);
16833
+ return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
16836
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16837
vshl_n_s16 (int16x4_t __a, const int __b)
16839
- return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b);
16840
+ return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
16843
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16844
vshl_n_s32 (int32x2_t __a, const int __b)
16846
- return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b);
16847
+ return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
16850
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16851
vshl_n_s64 (int64x1_t __a, const int __b)
16853
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
16854
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
16857
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16858
vshl_n_u8 (uint8x8_t __a, const int __b)
16860
- return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b);
16861
+ return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
16864
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16865
vshl_n_u16 (uint16x4_t __a, const int __b)
16867
- return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b);
16868
+ return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
16871
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16872
vshl_n_u32 (uint32x2_t __a, const int __b)
16874
- return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b);
16875
+ return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
16878
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16879
vshl_n_u64 (uint64x1_t __a, const int __b)
16881
- return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b);
16882
+ return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
16885
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16886
vshlq_n_s8 (int8x16_t __a, const int __b)
16888
- return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b);
16889
+ return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
16892
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16893
vshlq_n_s16 (int16x8_t __a, const int __b)
16895
- return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b);
16896
+ return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
16899
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16900
vshlq_n_s32 (int32x4_t __a, const int __b)
16902
- return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b);
16903
+ return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
16906
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16907
vshlq_n_s64 (int64x2_t __a, const int __b)
16909
- return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b);
16910
+ return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
16913
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16914
vshlq_n_u8 (uint8x16_t __a, const int __b)
16916
- return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b);
16917
+ return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
16920
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16921
vshlq_n_u16 (uint16x8_t __a, const int __b)
16923
- return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b);
16924
+ return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
16927
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16928
vshlq_n_u32 (uint32x4_t __a, const int __b)
16930
- return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b);
16931
+ return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
16934
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16935
vshlq_n_u64 (uint64x2_t __a, const int __b)
16937
- return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b);
16938
+ return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
16941
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16942
vshld_n_s64 (int64x1_t __a, const int __b)
16944
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
16945
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
16948
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16949
vshld_n_u64 (uint64x1_t __a, const int __b)
16951
- return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b);
16952
+ return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
16955
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16956
@@ -23428,109 +23640,109 @@
16957
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16958
vshr_n_s8 (int8x8_t __a, const int __b)
16960
- return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b);
16961
+ return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
16964
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16965
vshr_n_s16 (int16x4_t __a, const int __b)
16967
- return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b);
16968
+ return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
16971
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16972
vshr_n_s32 (int32x2_t __a, const int __b)
16974
- return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b);
16975
+ return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
16978
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16979
vshr_n_s64 (int64x1_t __a, const int __b)
16981
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
16982
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
16985
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16986
vshr_n_u8 (uint8x8_t __a, const int __b)
16988
- return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b);
16989
+ return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
16992
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16993
vshr_n_u16 (uint16x4_t __a, const int __b)
16995
- return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b);
16996
+ return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
16999
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17000
vshr_n_u32 (uint32x2_t __a, const int __b)
17002
- return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b);
17003
+ return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
17006
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17007
vshr_n_u64 (uint64x1_t __a, const int __b)
17009
- return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b);
17010
+ return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
17013
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17014
vshrq_n_s8 (int8x16_t __a, const int __b)
17016
- return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b);
17017
+ return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
17020
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17021
vshrq_n_s16 (int16x8_t __a, const int __b)
17023
- return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b);
17024
+ return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
17027
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17028
vshrq_n_s32 (int32x4_t __a, const int __b)
17030
- return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b);
17031
+ return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
17034
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17035
vshrq_n_s64 (int64x2_t __a, const int __b)
17037
- return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b);
17038
+ return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
17041
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17042
vshrq_n_u8 (uint8x16_t __a, const int __b)
17044
- return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b);
17045
+ return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
17048
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17049
vshrq_n_u16 (uint16x8_t __a, const int __b)
17051
- return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b);
17052
+ return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
17055
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17056
vshrq_n_u32 (uint32x4_t __a, const int __b)
17058
- return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b);
17059
+ return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
17062
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17063
vshrq_n_u64 (uint64x2_t __a, const int __b)
17065
- return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b);
17066
+ return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
17069
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17070
vshrd_n_s64 (int64x1_t __a, const int __b)
17072
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
17073
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
17076
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17077
vshrd_n_u64 (uint64x1_t __a, const int __b)
17079
- return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b);
17080
+ return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
17084
@@ -25159,7 +25371,7 @@
17085
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17086
vtst_s64 (int64x1_t __a, int64x1_t __b)
17088
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
17089
+ return (__a & __b) ? -1ll : 0ll;
17092
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17093
@@ -25186,8 +25398,7 @@
17094
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17095
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
17097
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
17098
- (int64x1_t) __b);
17099
+ return (__a & __b) ? -1ll : 0ll;
17102
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17103
@@ -25245,14 +25456,13 @@
17104
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17105
vtstd_s64 (int64x1_t __a, int64x1_t __b)
17107
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
17108
+ return (__a & __b) ? -1ll : 0ll;
17111
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17112
vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
17114
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
17115
- (int64x1_t) __b);
17116
+ return (__a & __b) ? -1ll : 0ll;
17120
--- a/src/gcc/config/aarch64/aarch64.md
17121
+++ b/src/gcc/config/aarch64/aarch64.md
17123
(define_c_enum "unspec" [
17136
@@ -230,6 +234,9 @@
17146
@@ -763,19 +770,41 @@
17149
(define_insn "*mov<mode>_aarch64"
17150
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,r,m, r,*w")
17151
- (match_operand:SHORT 1 "general_operand" " r,M,m,rZ,*w,r"))]
17152
+ [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w")
17153
+ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
17154
"(register_operand (operands[0], <MODE>mode)
17155
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
17159
- ldr<size>\\t%w0, %1
17160
- str<size>\\t%w1, %0
17161
- umov\\t%w0, %1.<v>[0]
17162
- dup\\t%0.<Vallxd>, %w1"
17163
- [(set_attr "v8type" "move,alu,load1,store1,*,*")
17164
- (set_attr "simd_type" "*,*,*,*,simd_movgp,simd_dupgp")
17166
+ switch (which_alternative)
17169
+ return "mov\t%w0, %w1";
17171
+ return "mov\t%w0, %1";
17173
+ return aarch64_output_scalar_simd_mov_immediate (operands[1],
17176
+ return "ldr<size>\t%w0, %1";
17178
+ return "ldr\t%<size>0, %1";
17180
+ return "str<size>\t%w1, %0";
17182
+ return "str\t%<size>1, %0";
17184
+ return "umov\t%w0, %1.<v>[0]";
17186
+ return "dup\t%0.<Vallxd>, %w1";
17188
+ return "dup\t%0, %1.<v>[0]";
17190
+ gcc_unreachable ();
17193
+ [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
17194
+ (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
17195
+ (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
17196
(set_attr "mode" "<MODE>")
17197
(set_attr "simd_mode" "<MODE>")]
17199
@@ -797,26 +826,28 @@
17202
(define_insn "*movsi_aarch64"
17203
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m, *w, r,*w")
17204
- (match_operand:SI 1 "aarch64_mov_operand" " r,M,m,rZ,rZ,*w,*w"))]
17205
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,*w,m, m,*w, r,*w")
17206
+ (match_operand:SI 1 "aarch64_mov_operand" " r,M,m, m,rZ,*w,rZ,*w,*w"))]
17207
"(register_operand (operands[0], SImode)
17208
|| aarch64_reg_or_zero (operands[1], SImode))"
17219
- [(set_attr "v8type" "move,alu,load1,store1,fmov,fmov,fmov")
17220
+ [(set_attr "v8type" "move,alu,load1,load1,store1,store1,fmov,fmov,fmov")
17221
(set_attr "mode" "SI")
17222
- (set_attr "fp" "*,*,*,*,yes,yes,yes")]
17223
+ (set_attr "fp" "*,*,*,*,*,*,yes,yes,yes")]
17226
(define_insn "*movdi_aarch64"
17227
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,m, r, r, *w, r,*w,w")
17228
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m,rZ,Usa,Ush,rZ,*w,*w,Dd"))]
17229
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w")
17230
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
17231
"(register_operand (operands[0], DImode)
17232
|| aarch64_reg_or_zero (operands[1], DImode))"
17234
@@ -825,17 +856,19 @@
17247
- [(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov")
17248
+ [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
17249
(set_attr "mode" "DI")
17250
- (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,*")
17251
- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,yes")]
17252
+ (set_attr "fp" "*,*,*,*,*,*,*,*,*,*,yes,yes,yes,*")
17253
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
17256
(define_insn "insv_imm<mode>"
17257
@@ -843,9 +876,8 @@
17259
(match_operand:GPI 1 "const_int_operand" "n"))
17260
(match_operand:GPI 2 "const_int_operand" "n"))]
17261
- "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
17262
- && INTVAL (operands[1]) % 16 == 0
17263
- && UINTVAL (operands[2]) <= 0xffff"
17264
+ "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
17265
+ && UINTVAL (operands[1]) % 16 == 0"
17266
"movk\\t%<w>0, %X2, lsl %1"
17267
[(set_attr "v8type" "movk")
17268
(set_attr "mode" "<MODE>")]
17269
@@ -982,9 +1014,9 @@
17270
|| register_operand (operands[1], TFmode))"
17272
orr\\t%0.16b, %1.16b, %1.16b
17273
- mov\\t%0, %1\;mov\\t%H0, %H1
17274
- fmov\\t%d0, %Q1\;fmov\\t%0.d[1], %R1
17275
- fmov\\t%Q0, %d1\;fmov\\t%R0, %1.d[1]
17282
@@ -998,6 +1030,17 @@
17283
(set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
17287
+ [(set (match_operand:TF 0 "register_operand" "")
17288
+ (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
17289
+ "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
17292
+ aarch64_split_128bit_move (operands[0], operands[1]);
17297
;; Operands 1 and 3 are tied together by the final condition; so we allow
17298
;; fairly lax checking on the second memory operation.
17299
(define_insn "load_pair<mode>"
17300
@@ -1150,13 +1193,14 @@
17303
(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
17304
- [(set (match_operand:GPI 0 "register_operand" "=r,r")
17305
- (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
17306
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
17307
+ (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
17310
uxt<SHORT:size>\t%<GPI:w>0, %w1
17311
- ldr<SHORT:size>\t%w0, %1"
17312
- [(set_attr "v8type" "extend,load1")
17313
+ ldr<SHORT:size>\t%w0, %1
17314
+ ldr\t%<SHORT:size>0, %1"
17315
+ [(set_attr "v8type" "extend,load1,load1")
17316
(set_attr "mode" "<GPI:MODE>")]
17319
@@ -1287,6 +1331,112 @@
17320
(set_attr "mode" "SI")]
17323
+(define_insn "*adds_mul_imm_<mode>"
17324
+ [(set (reg:CC_NZ CC_REGNUM)
17326
+ (plus:GPI (mult:GPI
17327
+ (match_operand:GPI 1 "register_operand" "r")
17328
+ (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
17329
+ (match_operand:GPI 3 "register_operand" "rk"))
17331
+ (set (match_operand:GPI 0 "register_operand" "=r")
17332
+ (plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
17335
+ "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
17336
+ [(set_attr "v8type" "alus_shift")
17337
+ (set_attr "mode" "<MODE>")]
17340
+(define_insn "*subs_mul_imm_<mode>"
17341
+ [(set (reg:CC_NZ CC_REGNUM)
17343
+ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
17345
+ (match_operand:GPI 2 "register_operand" "r")
17346
+ (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
17348
+ (set (match_operand:GPI 0 "register_operand" "=r")
17349
+ (minus:GPI (match_dup 1)
17350
+ (mult:GPI (match_dup 2) (match_dup 3))))]
17352
+ "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
17353
+ [(set_attr "v8type" "alus_shift")
17354
+ (set_attr "mode" "<MODE>")]
17357
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
17358
+ [(set (reg:CC_NZ CC_REGNUM)
17361
+ (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
17362
+ (match_operand:GPI 2 "register_operand" "r"))
17364
+ (set (match_operand:GPI 0 "register_operand" "=r")
17365
+ (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
17367
+ "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
17368
+ [(set_attr "v8type" "alus_ext")
17369
+ (set_attr "mode" "<GPI:MODE>")]
17372
+(define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
17373
+ [(set (reg:CC_NZ CC_REGNUM)
17375
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
17377
+ (match_operand:ALLX 2 "register_operand" "r")))
17379
+ (set (match_operand:GPI 0 "register_operand" "=r")
17380
+ (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
17382
+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
17383
+ [(set_attr "v8type" "alus_ext")
17384
+ (set_attr "mode" "<GPI:MODE>")]
17387
+(define_insn "*adds_<optab><mode>_multp2"
17388
+ [(set (reg:CC_NZ CC_REGNUM)
17390
+ (plus:GPI (ANY_EXTRACT:GPI
17391
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
17392
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
17393
+ (match_operand 3 "const_int_operand" "n")
17395
+ (match_operand:GPI 4 "register_operand" "r"))
17397
+ (set (match_operand:GPI 0 "register_operand" "=r")
17398
+ (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
17402
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
17403
+ "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
17404
+ [(set_attr "v8type" "alus_ext")
17405
+ (set_attr "mode" "<MODE>")]
17408
+(define_insn "*subs_<optab><mode>_multp2"
17409
+ [(set (reg:CC_NZ CC_REGNUM)
17411
+ (minus:GPI (match_operand:GPI 4 "register_operand" "r")
17413
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
17414
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
17415
+ (match_operand 3 "const_int_operand" "n")
17418
+ (set (match_operand:GPI 0 "register_operand" "=r")
17419
+ (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
17420
+ (mult:GPI (match_dup 1) (match_dup 2))
17422
+ (const_int 0))))]
17423
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
17424
+ "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
17425
+ [(set_attr "v8type" "alus_ext")
17426
+ (set_attr "mode" "<MODE>")]
17429
(define_insn "*add<mode>3nr_compare0"
17430
[(set (reg:CC_NZ CC_REGNUM)
17432
@@ -1791,6 +1941,34 @@
17433
(set_attr "mode" "SI")]
17436
+(define_insn "*sub<mode>3_carryin"
17438
+ (match_operand:GPI 0 "register_operand" "=r")
17439
+ (minus:GPI (minus:GPI
17440
+ (match_operand:GPI 1 "register_operand" "r")
17441
+ (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
17442
+ (match_operand:GPI 2 "register_operand" "r")))]
17444
+ "sbc\\t%<w>0, %<w>1, %<w>2"
17445
+ [(set_attr "v8type" "adc")
17446
+ (set_attr "mode" "<MODE>")]
17449
+;; zero_extend version of the above
17450
+(define_insn "*subsi3_carryin_uxtw"
17452
+ (match_operand:DI 0 "register_operand" "=r")
17454
+ (minus:SI (minus:SI
17455
+ (match_operand:SI 1 "register_operand" "r")
17456
+ (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
17457
+ (match_operand:SI 2 "register_operand" "r"))))]
17459
+ "sbc\\t%w0, %w1, %w2"
17460
+ [(set_attr "v8type" "adc")
17461
+ (set_attr "mode" "SI")]
17464
(define_insn "*sub_uxt<mode>_multp2"
17465
[(set (match_operand:GPI 0 "register_operand" "=rk")
17466
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
17467
@@ -1825,6 +2003,38 @@
17468
(set_attr "mode" "SI")]
17471
+(define_insn_and_split "absdi2"
17472
+ [(set (match_operand:DI 0 "register_operand" "=r,w")
17473
+ (abs:DI (match_operand:DI 1 "register_operand" "r,w")))
17474
+ (clobber (match_scratch:DI 2 "=&r,X"))]
17479
+ "reload_completed
17480
+ && GP_REGNUM_P (REGNO (operands[0]))
17481
+ && GP_REGNUM_P (REGNO (operands[1]))"
17484
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2],
17485
+ gen_rtx_XOR (DImode,
17486
+ gen_rtx_ASHIFTRT (DImode,
17490
+ emit_insn (gen_rtx_SET (VOIDmode,
17492
+ gen_rtx_MINUS (DImode,
17494
+ gen_rtx_ASHIFTRT (DImode,
17496
+ GEN_INT (63)))));
17499
+ [(set_attr "v8type" "alu")
17500
+ (set_attr "mode" "DI")]
17503
(define_insn "neg<mode>2"
17504
[(set (match_operand:GPI 0 "register_operand" "=r")
17505
(neg:GPI (match_operand:GPI 1 "register_operand" "r")))]
17506
@@ -1844,6 +2054,27 @@
17507
(set_attr "mode" "SI")]
17510
+(define_insn "*ngc<mode>"
17511
+ [(set (match_operand:GPI 0 "register_operand" "=r")
17512
+ (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
17513
+ (match_operand:GPI 1 "register_operand" "r")))]
17515
+ "ngc\\t%<w>0, %<w>1"
17516
+ [(set_attr "v8type" "adc")
17517
+ (set_attr "mode" "<MODE>")]
17520
+(define_insn "*ngcsi_uxtw"
17521
+ [(set (match_operand:DI 0 "register_operand" "=r")
17523
+ (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
17524
+ (match_operand:SI 1 "register_operand" "r"))))]
17527
+ [(set_attr "v8type" "adc")
17528
+ (set_attr "mode" "SI")]
17531
(define_insn "*neg<mode>2_compare0"
17532
[(set (reg:CC_NZ CC_REGNUM)
17533
(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
17534
@@ -1869,6 +2100,21 @@
17535
(set_attr "mode" "SI")]
17538
+(define_insn "*neg_<shift><mode>3_compare0"
17539
+ [(set (reg:CC_NZ CC_REGNUM)
17541
+ (neg:GPI (ASHIFT:GPI
17542
+ (match_operand:GPI 1 "register_operand" "r")
17543
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
17545
+ (set (match_operand:GPI 0 "register_operand" "=r")
17546
+ (neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))]
17548
+ "negs\\t%<w>0, %<w>1, <shift> %2"
17549
+ [(set_attr "v8type" "alus_shift")
17550
+ (set_attr "mode" "<MODE>")]
17553
(define_insn "*neg_<shift>_<mode>2"
17554
[(set (match_operand:GPI 0 "register_operand" "=r")
17555
(neg:GPI (ASHIFT:GPI
17556
@@ -2158,6 +2404,18 @@
17557
(set_attr "mode" "<GPI:MODE>")]
17560
+(define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
17561
+ [(set (reg:CC_SWP CC_REGNUM)
17562
+ (compare:CC_SWP (ashift:GPI
17564
+ (match_operand:ALLX 0 "register_operand" "r"))
17565
+ (match_operand 1 "aarch64_imm3" "Ui3"))
17566
+ (match_operand:GPI 2 "register_operand" "r")))]
17568
+ "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
17569
+ [(set_attr "v8type" "alus_ext")
17570
+ (set_attr "mode" "<GPI:MODE>")]
17573
;; -------------------------------------------------------------------
17574
;; Store-flag and conditional select insns
17575
@@ -2211,7 +2469,7 @@
17576
(set_attr "mode" "SI")]
17579
-(define_insn "*cstore<mode>_neg"
17580
+(define_insn "cstore<mode>_neg"
17581
[(set (match_operand:ALLI 0 "register_operand" "=r")
17582
(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
17583
[(match_operand 2 "cc_register" "") (const_int 0)])))]
17584
@@ -2434,6 +2692,69 @@
17585
[(set_attr "v8type" "logic,logic_imm")
17586
(set_attr "mode" "SI")])
17588
+(define_insn "*and<mode>3_compare0"
17589
+ [(set (reg:CC_NZ CC_REGNUM)
17591
+ (and:GPI (match_operand:GPI 1 "register_operand" "%r,r")
17592
+ (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>"))
17594
+ (set (match_operand:GPI 0 "register_operand" "=r,r")
17595
+ (and:GPI (match_dup 1) (match_dup 2)))]
17597
+ "ands\\t%<w>0, %<w>1, %<w>2"
17598
+ [(set_attr "v8type" "logics,logics_imm")
17599
+ (set_attr "mode" "<MODE>")]
17602
+;; zero_extend version of above
17603
+(define_insn "*andsi3_compare0_uxtw"
17604
+ [(set (reg:CC_NZ CC_REGNUM)
17606
+ (and:SI (match_operand:SI 1 "register_operand" "%r,r")
17607
+ (match_operand:SI 2 "aarch64_logical_operand" "r,K"))
17609
+ (set (match_operand:DI 0 "register_operand" "=r,r")
17610
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
17612
+ "ands\\t%w0, %w1, %w2"
17613
+ [(set_attr "v8type" "logics,logics_imm")
17614
+ (set_attr "mode" "SI")]
17617
+(define_insn "*and_<SHIFT:optab><mode>3_compare0"
17618
+ [(set (reg:CC_NZ CC_REGNUM)
17620
+ (and:GPI (SHIFT:GPI
17621
+ (match_operand:GPI 1 "register_operand" "r")
17622
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
17623
+ (match_operand:GPI 3 "register_operand" "r"))
17625
+ (set (match_operand:GPI 0 "register_operand" "=r")
17626
+ (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
17628
+ "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
17629
+ [(set_attr "v8type" "logics_shift")
17630
+ (set_attr "mode" "<MODE>")]
17633
+;; zero_extend version of above
17634
+(define_insn "*and_<SHIFT:optab>si3_compare0_uxtw"
17635
+ [(set (reg:CC_NZ CC_REGNUM)
17637
+ (and:SI (SHIFT:SI
17638
+ (match_operand:SI 1 "register_operand" "r")
17639
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
17640
+ (match_operand:SI 3 "register_operand" "r"))
17642
+ (set (match_operand:DI 0 "register_operand" "=r")
17643
+ (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2))
17644
+ (match_dup 3))))]
17646
+ "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
17647
+ [(set_attr "v8type" "logics_shift")
17648
+ (set_attr "mode" "SI")]
17651
(define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
17652
[(set (match_operand:GPI 0 "register_operand" "=r")
17653
(LOGICAL:GPI (SHIFT:GPI
17654
@@ -2704,6 +3025,62 @@
17655
(set_attr "mode" "<MODE>")]
17658
+(define_insn "*extr<mode>5_insn"
17659
+ [(set (match_operand:GPI 0 "register_operand" "=r")
17660
+ (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
17661
+ (match_operand 3 "const_int_operand" "n"))
17662
+ (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
17663
+ (match_operand 4 "const_int_operand" "n"))))]
17664
+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
17665
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
17666
+ "extr\\t%<w>0, %<w>1, %<w>2, %4"
17667
+ [(set_attr "v8type" "shift")
17668
+ (set_attr "mode" "<MODE>")]
17671
+;; zero_extend version of the above
17672
+(define_insn "*extrsi5_insn_uxtw"
17673
+ [(set (match_operand:DI 0 "register_operand" "=r")
17675
+ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
17676
+ (match_operand 3 "const_int_operand" "n"))
17677
+ (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
17678
+ (match_operand 4 "const_int_operand" "n")))))]
17679
+ "UINTVAL (operands[3]) < 32 &&
17680
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
17681
+ "extr\\t%w0, %w1, %w2, %4"
17682
+ [(set_attr "v8type" "shift")
17683
+ (set_attr "mode" "SI")]
17686
+(define_insn "*ror<mode>3_insn"
17687
+ [(set (match_operand:GPI 0 "register_operand" "=r")
17688
+ (rotate:GPI (match_operand:GPI 1 "register_operand" "r")
17689
+ (match_operand 2 "const_int_operand" "n")))]
17690
+ "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
17692
+ operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
17693
+ return "ror\\t%<w>0, %<w>1, %3";
17695
+ [(set_attr "v8type" "shift")
17696
+ (set_attr "mode" "<MODE>")]
17699
+;; zero_extend version of the above
17700
+(define_insn "*rorsi3_insn_uxtw"
17701
+ [(set (match_operand:DI 0 "register_operand" "=r")
17703
+ (rotate:SI (match_operand:SI 1 "register_operand" "r")
17704
+ (match_operand 2 "const_int_operand" "n"))))]
17705
+ "UINTVAL (operands[2]) < 32"
17707
+ operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
17708
+ return "ror\\t%w0, %w1, %3";
17710
+ [(set_attr "v8type" "shift")
17711
+ (set_attr "mode" "SI")]
17714
(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
17715
[(set (match_operand:GPI 0 "register_operand" "=r")
17717
@@ -2770,6 +3147,50 @@
17718
(set_attr "mode" "<MODE>")]
17721
+;; Bitfield Insert (insv)
17722
+(define_expand "insv<mode>"
17723
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
17724
+ (match_operand 1 "const_int_operand")
17725
+ (match_operand 2 "const_int_operand"))
17726
+ (match_operand:GPI 3 "general_operand"))]
17729
+ unsigned HOST_WIDE_INT width = UINTVAL (operands[1]);
17730
+ unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
17731
+ rtx value = operands[3];
17733
+ if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode))
17736
+ if (CONST_INT_P (value))
17738
+ unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
17740
+ /* Prefer AND/OR for inserting all zeros or all ones. */
17741
+ if ((UINTVAL (value) & mask) == 0
17742
+ || (UINTVAL (value) & mask) == mask)
17745
+ /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */
17746
+ if (width == 16 && (pos % 16) == 0)
17749
+ operands[3] = force_reg (<MODE>mode, value);
17752
+(define_insn "*insv_reg<mode>"
17753
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
17754
+ (match_operand 1 "const_int_operand" "n")
17755
+ (match_operand 2 "const_int_operand" "n"))
17756
+ (match_operand:GPI 3 "register_operand" "r"))]
17757
+ "!(UINTVAL (operands[1]) == 0
17758
+ || (UINTVAL (operands[2]) + UINTVAL (operands[1])
17759
+ > GET_MODE_BITSIZE (<MODE>mode)))"
17760
+ "bfi\\t%<w>0, %<w>3, %2, %1"
17761
+ [(set_attr "v8type" "bfm")
17762
+ (set_attr "mode" "<MODE>")]
17765
(define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
17766
[(set (match_operand:GPI 0 "register_operand" "=r")
17767
(ashift:GPI (ANY_EXTEND:GPI
17768
@@ -3090,6 +3511,27 @@
17769
(set_attr "mode" "<MODE>")]
17772
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
17773
+ [(set (match_operand:GPF 0 "register_operand" "=w")
17774
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
17777
+ "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
17778
+ [(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
17779
+ (set_attr "mode" "<MODE>")]
17782
+(define_insn "aarch64_frecps<mode>"
17783
+ [(set (match_operand:GPF 0 "register_operand" "=w")
17784
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
17785
+ (match_operand:GPF 2 "register_operand" "w")]
17788
+ "frecps\\t%<s>0, %<s>1, %<s>2"
17789
+ [(set_attr "v8type" "frecps")
17790
+ (set_attr "mode" "<MODE>")]
17793
;; -------------------------------------------------------------------
17795
;; -------------------------------------------------------------------
17796
@@ -3146,9 +3588,9 @@
17797
;; after or during reload as we don't want these patterns to start
17798
;; kicking in during the combiner.
17800
-(define_insn "aarch64_movdi_tilow"
17801
+(define_insn "aarch64_movdi_<mode>low"
17802
[(set (match_operand:DI 0 "register_operand" "=r")
17803
- (truncate:DI (match_operand:TI 1 "register_operand" "w")))]
17804
+ (truncate:DI (match_operand:TX 1 "register_operand" "w")))]
17805
"reload_completed || reload_in_progress"
17807
[(set_attr "v8type" "fmovf2i")
17808
@@ -3156,10 +3598,10 @@
17809
(set_attr "length" "4")
17812
-(define_insn "aarch64_movdi_tihigh"
17813
+(define_insn "aarch64_movdi_<mode>high"
17814
[(set (match_operand:DI 0 "register_operand" "=r")
17816
- (lshiftrt:TI (match_operand:TI 1 "register_operand" "w")
17817
+ (lshiftrt:TX (match_operand:TX 1 "register_operand" "w")
17819
"reload_completed || reload_in_progress"
17820
"fmov\\t%x0, %1.d[1]"
17821
@@ -3168,24 +3610,22 @@
17822
(set_attr "length" "4")
17825
-(define_insn "aarch64_movtihigh_di"
17826
- [(set (zero_extract:TI (match_operand:TI 0 "register_operand" "+w")
17827
+(define_insn "aarch64_mov<mode>high_di"
17828
+ [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w")
17829
(const_int 64) (const_int 64))
17830
- (zero_extend:TI (match_operand:DI 1 "register_operand" "r")))]
17831
+ (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
17832
"reload_completed || reload_in_progress"
17833
"fmov\\t%0.d[1], %x1"
17835
[(set_attr "v8type" "fmovi2f")
17836
(set_attr "mode" "DI")
17837
(set_attr "length" "4")
17840
-(define_insn "aarch64_movtilow_di"
17841
- [(set (match_operand:TI 0 "register_operand" "=w")
17842
- (zero_extend:TI (match_operand:DI 1 "register_operand" "r")))]
17843
+(define_insn "aarch64_mov<mode>low_di"
17844
+ [(set (match_operand:TX 0 "register_operand" "=w")
17845
+ (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
17846
"reload_completed || reload_in_progress"
17849
[(set_attr "v8type" "fmovi2f")
17850
(set_attr "mode" "DI")
17851
(set_attr "length" "4")
17852
@@ -3197,7 +3637,6 @@
17853
(truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
17854
"reload_completed || reload_in_progress"
17857
[(set_attr "v8type" "fmovi2f")
17858
(set_attr "mode" "DI")
17859
(set_attr "length" "4")
17860
--- a/src/gcc/config/aarch64/aarch64-builtins.c
17861
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
17863
#include "langhooks.h"
17864
#include "diagnostic-core.h"
17865
#include "optabs.h"
17866
+#include "gimple.h"
17868
enum aarch64_simd_builtin_type_mode
17882
+#define sf_UP T_SF
17886
@@ -128,123 +131,136 @@
17887
unsigned int fcode;
17888
} aarch64_simd_builtin_datum;
17890
-#define CF(N, X) CODE_FOR_aarch64_##N##X
17891
+#define CF0(N, X) CODE_FOR_aarch64_##N##X
17892
+#define CF1(N, X) CODE_FOR_##N##X##1
17893
+#define CF2(N, X) CODE_FOR_##N##X##2
17894
+#define CF3(N, X) CODE_FOR_##N##X##3
17895
+#define CF4(N, X) CODE_FOR_##N##X##4
17896
+#define CF10(N, X) CODE_FOR_##N##X
17898
-#define VAR1(T, N, A) \
17899
- {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0},
17900
-#define VAR2(T, N, A, B) \
17903
-#define VAR3(T, N, A, B, C) \
17904
- VAR2 (T, N, A, B) \
17906
-#define VAR4(T, N, A, B, C, D) \
17907
- VAR3 (T, N, A, B, C) \
17909
-#define VAR5(T, N, A, B, C, D, E) \
17910
- VAR4 (T, N, A, B, C, D) \
17912
-#define VAR6(T, N, A, B, C, D, E, F) \
17913
- VAR5 (T, N, A, B, C, D, E) \
17915
-#define VAR7(T, N, A, B, C, D, E, F, G) \
17916
- VAR6 (T, N, A, B, C, D, E, F) \
17918
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
17919
- VAR7 (T, N, A, B, C, D, E, F, G) \
17921
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17922
- VAR8 (T, N, A, B, C, D, E, F, G, H) \
17924
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17925
- VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
17927
-#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
17928
- VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
17930
-#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
17931
- VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
17933
+#define VAR1(T, N, MAP, A) \
17934
+ {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0},
17935
+#define VAR2(T, N, MAP, A, B) \
17936
+ VAR1 (T, N, MAP, A) \
17937
+ VAR1 (T, N, MAP, B)
17938
+#define VAR3(T, N, MAP, A, B, C) \
17939
+ VAR2 (T, N, MAP, A, B) \
17940
+ VAR1 (T, N, MAP, C)
17941
+#define VAR4(T, N, MAP, A, B, C, D) \
17942
+ VAR3 (T, N, MAP, A, B, C) \
17943
+ VAR1 (T, N, MAP, D)
17944
+#define VAR5(T, N, MAP, A, B, C, D, E) \
17945
+ VAR4 (T, N, MAP, A, B, C, D) \
17946
+ VAR1 (T, N, MAP, E)
17947
+#define VAR6(T, N, MAP, A, B, C, D, E, F) \
17948
+ VAR5 (T, N, MAP, A, B, C, D, E) \
17949
+ VAR1 (T, N, MAP, F)
17950
+#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
17951
+ VAR6 (T, N, MAP, A, B, C, D, E, F) \
17952
+ VAR1 (T, N, MAP, G)
17953
+#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
17954
+ VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
17955
+ VAR1 (T, N, MAP, H)
17956
+#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
17957
+ VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
17958
+ VAR1 (T, N, MAP, I)
17959
+#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
17960
+ VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
17961
+ VAR1 (T, N, MAP, J)
17962
+#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
17963
+ VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
17964
+ VAR1 (T, N, MAP, K)
17965
+#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
17966
+ VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
17967
+ VAR1 (T, N, MAP, L)
17969
/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
17970
modes as is given for each define_mode_iterator in
17971
config/aarch64/iterators.md. */
17973
-#define BUILTIN_DX(T, N) \
17974
- VAR2 (T, N, di, df)
17975
-#define BUILTIN_SDQ_I(T, N) \
17976
- VAR4 (T, N, qi, hi, si, di)
17977
-#define BUILTIN_SD_HSI(T, N) \
17978
- VAR2 (T, N, hi, si)
17979
-#define BUILTIN_V2F(T, N) \
17980
- VAR2 (T, N, v2sf, v2df)
17981
-#define BUILTIN_VALL(T, N) \
17982
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df)
17983
-#define BUILTIN_VB(T, N) \
17984
- VAR2 (T, N, v8qi, v16qi)
17985
-#define BUILTIN_VD(T, N) \
17986
- VAR4 (T, N, v8qi, v4hi, v2si, v2sf)
17987
-#define BUILTIN_VDC(T, N) \
17988
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
17989
-#define BUILTIN_VDIC(T, N) \
17990
- VAR3 (T, N, v8qi, v4hi, v2si)
17991
-#define BUILTIN_VDN(T, N) \
17992
- VAR3 (T, N, v4hi, v2si, di)
17993
-#define BUILTIN_VDQ(T, N) \
17994
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
17995
-#define BUILTIN_VDQF(T, N) \
17996
- VAR3 (T, N, v2sf, v4sf, v2df)
17997
-#define BUILTIN_VDQHS(T, N) \
17998
- VAR4 (T, N, v4hi, v8hi, v2si, v4si)
17999
-#define BUILTIN_VDQIF(T, N) \
18000
- VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
18001
-#define BUILTIN_VDQM(T, N) \
18002
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18003
-#define BUILTIN_VDQV(T, N) \
18004
- VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si)
18005
-#define BUILTIN_VDQ_BHSI(T, N) \
18006
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18007
-#define BUILTIN_VDQ_I(T, N) \
18008
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
18009
-#define BUILTIN_VDW(T, N) \
18010
- VAR3 (T, N, v8qi, v4hi, v2si)
18011
-#define BUILTIN_VD_BHSI(T, N) \
18012
- VAR3 (T, N, v8qi, v4hi, v2si)
18013
-#define BUILTIN_VD_HSI(T, N) \
18014
- VAR2 (T, N, v4hi, v2si)
18015
-#define BUILTIN_VD_RE(T, N) \
18016
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
18017
-#define BUILTIN_VQ(T, N) \
18018
- VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df)
18019
-#define BUILTIN_VQN(T, N) \
18020
- VAR3 (T, N, v8hi, v4si, v2di)
18021
-#define BUILTIN_VQW(T, N) \
18022
- VAR3 (T, N, v16qi, v8hi, v4si)
18023
-#define BUILTIN_VQ_HSI(T, N) \
18024
- VAR2 (T, N, v8hi, v4si)
18025
-#define BUILTIN_VQ_S(T, N) \
18026
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18027
-#define BUILTIN_VSDQ_HSI(T, N) \
18028
- VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si)
18029
-#define BUILTIN_VSDQ_I(T, N) \
18030
- VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
18031
-#define BUILTIN_VSDQ_I_BHSI(T, N) \
18032
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
18033
-#define BUILTIN_VSDQ_I_DI(T, N) \
18034
- VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
18035
-#define BUILTIN_VSD_HSI(T, N) \
18036
- VAR4 (T, N, v4hi, v2si, hi, si)
18037
-#define BUILTIN_VSQN_HSDI(T, N) \
18038
- VAR6 (T, N, v8hi, v4si, v2di, hi, si, di)
18039
-#define BUILTIN_VSTRUCT(T, N) \
18040
- VAR3 (T, N, oi, ci, xi)
18041
+#define BUILTIN_DX(T, N, MAP) \
18042
+ VAR2 (T, N, MAP, di, df)
18043
+#define BUILTIN_GPF(T, N, MAP) \
18044
+ VAR2 (T, N, MAP, sf, df)
18045
+#define BUILTIN_SDQ_I(T, N, MAP) \
18046
+ VAR4 (T, N, MAP, qi, hi, si, di)
18047
+#define BUILTIN_SD_HSI(T, N, MAP) \
18048
+ VAR2 (T, N, MAP, hi, si)
18049
+#define BUILTIN_V2F(T, N, MAP) \
18050
+ VAR2 (T, N, MAP, v2sf, v2df)
18051
+#define BUILTIN_VALL(T, N, MAP) \
18052
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
18053
+ v4si, v2di, v2sf, v4sf, v2df)
18054
+#define BUILTIN_VALLDI(T, N, MAP) \
18055
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
18056
+ v4si, v2di, v2sf, v4sf, v2df, di)
18057
+#define BUILTIN_VB(T, N, MAP) \
18058
+ VAR2 (T, N, MAP, v8qi, v16qi)
18059
+#define BUILTIN_VD(T, N, MAP) \
18060
+ VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
18061
+#define BUILTIN_VDC(T, N, MAP) \
18062
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
18063
+#define BUILTIN_VDIC(T, N, MAP) \
18064
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
18065
+#define BUILTIN_VDN(T, N, MAP) \
18066
+ VAR3 (T, N, MAP, v4hi, v2si, di)
18067
+#define BUILTIN_VDQ(T, N, MAP) \
18068
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
18069
+#define BUILTIN_VDQF(T, N, MAP) \
18070
+ VAR3 (T, N, MAP, v2sf, v4sf, v2df)
18071
+#define BUILTIN_VDQH(T, N, MAP) \
18072
+ VAR2 (T, N, MAP, v4hi, v8hi)
18073
+#define BUILTIN_VDQHS(T, N, MAP) \
18074
+ VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
18075
+#define BUILTIN_VDQIF(T, N, MAP) \
18076
+ VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
18077
+#define BUILTIN_VDQM(T, N, MAP) \
18078
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18079
+#define BUILTIN_VDQV(T, N, MAP) \
18080
+ VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
18081
+#define BUILTIN_VDQ_BHSI(T, N, MAP) \
18082
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18083
+#define BUILTIN_VDQ_I(T, N, MAP) \
18084
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
18085
+#define BUILTIN_VDW(T, N, MAP) \
18086
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
18087
+#define BUILTIN_VD_BHSI(T, N, MAP) \
18088
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
18089
+#define BUILTIN_VD_HSI(T, N, MAP) \
18090
+ VAR2 (T, N, MAP, v4hi, v2si)
18091
+#define BUILTIN_VD_RE(T, N, MAP) \
18092
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
18093
+#define BUILTIN_VQ(T, N, MAP) \
18094
+ VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
18095
+#define BUILTIN_VQN(T, N, MAP) \
18096
+ VAR3 (T, N, MAP, v8hi, v4si, v2di)
18097
+#define BUILTIN_VQW(T, N, MAP) \
18098
+ VAR3 (T, N, MAP, v16qi, v8hi, v4si)
18099
+#define BUILTIN_VQ_HSI(T, N, MAP) \
18100
+ VAR2 (T, N, MAP, v8hi, v4si)
18101
+#define BUILTIN_VQ_S(T, N, MAP) \
18102
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
18103
+#define BUILTIN_VSDQ_HSI(T, N, MAP) \
18104
+ VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
18105
+#define BUILTIN_VSDQ_I(T, N, MAP) \
18106
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
18107
+#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
18108
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
18109
+#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
18110
+ VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
18111
+#define BUILTIN_VSD_HSI(T, N, MAP) \
18112
+ VAR4 (T, N, MAP, v4hi, v2si, hi, si)
18113
+#define BUILTIN_VSQN_HSDI(T, N, MAP) \
18114
+ VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
18115
+#define BUILTIN_VSTRUCT(T, N, MAP) \
18116
+ VAR3 (T, N, MAP, oi, ci, xi)
18118
static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
18119
#include "aarch64-simd-builtins.def"
18123
-#define VAR1(T, N, A) \
18124
+#define VAR1(T, N, MAP, A) \
18125
AARCH64_SIMD_BUILTIN_##N##A,
18127
enum aarch64_builtins
18128
@@ -257,53 +273,6 @@
18129
AARCH64_BUILTIN_MAX
18133
-#undef BUILTIN_SDQ_I
18134
-#undef BUILTIN_SD_HSI
18135
-#undef BUILTIN_V2F
18136
-#undef BUILTIN_VALL
18139
-#undef BUILTIN_VDC
18140
-#undef BUILTIN_VDIC
18141
-#undef BUILTIN_VDN
18142
-#undef BUILTIN_VDQ
18143
-#undef BUILTIN_VDQF
18144
-#undef BUILTIN_VDQHS
18145
-#undef BUILTIN_VDQIF
18146
-#undef BUILTIN_VDQM
18147
-#undef BUILTIN_VDQV
18148
-#undef BUILTIN_VDQ_BHSI
18149
-#undef BUILTIN_VDQ_I
18150
-#undef BUILTIN_VDW
18151
-#undef BUILTIN_VD_BHSI
18152
-#undef BUILTIN_VD_HSI
18153
-#undef BUILTIN_VD_RE
18155
-#undef BUILTIN_VQN
18156
-#undef BUILTIN_VQW
18157
-#undef BUILTIN_VQ_HSI
18158
-#undef BUILTIN_VQ_S
18159
-#undef BUILTIN_VSDQ_HSI
18160
-#undef BUILTIN_VSDQ_I
18161
-#undef BUILTIN_VSDQ_I_BHSI
18162
-#undef BUILTIN_VSDQ_I_DI
18163
-#undef BUILTIN_VSD_HSI
18164
-#undef BUILTIN_VSQN_HSDI
18165
-#undef BUILTIN_VSTRUCT
18179
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
18181
#define NUM_DREG_TYPES 6
18182
@@ -609,7 +578,7 @@
18184
"v8qi", "v4hi", "v2si", "v2sf", "di", "df",
18185
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
18186
- "ti", "ei", "oi", "xi", "si", "hi", "qi"
18187
+ "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
18191
@@ -1259,30 +1228,82 @@
18192
&& in_mode == N##Fmode && in_n == C)
18193
case BUILT_IN_FLOOR:
18194
case BUILT_IN_FLOORF:
18195
- return AARCH64_FIND_FRINT_VARIANT (frintm);
18196
+ return AARCH64_FIND_FRINT_VARIANT (floor);
18197
case BUILT_IN_CEIL:
18198
case BUILT_IN_CEILF:
18199
- return AARCH64_FIND_FRINT_VARIANT (frintp);
18200
+ return AARCH64_FIND_FRINT_VARIANT (ceil);
18201
case BUILT_IN_TRUNC:
18202
case BUILT_IN_TRUNCF:
18203
- return AARCH64_FIND_FRINT_VARIANT (frintz);
18204
+ return AARCH64_FIND_FRINT_VARIANT (btrunc);
18205
case BUILT_IN_ROUND:
18206
case BUILT_IN_ROUNDF:
18207
- return AARCH64_FIND_FRINT_VARIANT (frinta);
18208
+ return AARCH64_FIND_FRINT_VARIANT (round);
18209
case BUILT_IN_NEARBYINT:
18210
case BUILT_IN_NEARBYINTF:
18211
- return AARCH64_FIND_FRINT_VARIANT (frinti);
18212
+ return AARCH64_FIND_FRINT_VARIANT (nearbyint);
18213
case BUILT_IN_SQRT:
18214
case BUILT_IN_SQRTF:
18215
return AARCH64_FIND_FRINT_VARIANT (sqrt);
18216
#undef AARCH64_CHECK_BUILTIN_MODE
18217
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
18218
+ (out_mode == SImode && out_n == C \
18219
+ && in_mode == N##Imode && in_n == C)
18220
+ case BUILT_IN_CLZ:
18222
+ if (AARCH64_CHECK_BUILTIN_MODE (4, S))
18223
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si];
18224
+ return NULL_TREE;
18226
+#undef AARCH64_CHECK_BUILTIN_MODE
18227
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
18228
(out_mode == N##Imode && out_n == C \
18229
&& in_mode == N##Fmode && in_n == C)
18230
case BUILT_IN_LFLOOR:
18231
- return AARCH64_FIND_FRINT_VARIANT (fcvtms);
18232
+ case BUILT_IN_IFLOORF:
18234
+ tree new_tree = NULL_TREE;
18235
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
18237
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2dfv2di];
18238
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
18240
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv4sfv4si];
18241
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
18243
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2sfv2si];
18246
case BUILT_IN_LCEIL:
18247
- return AARCH64_FIND_FRINT_VARIANT (fcvtps);
18248
+ case BUILT_IN_ICEILF:
18250
+ tree new_tree = NULL_TREE;
18251
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
18253
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2dfv2di];
18254
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
18256
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv4sfv4si];
18257
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
18259
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2sfv2si];
18262
+ case BUILT_IN_LROUND:
18263
+ case BUILT_IN_IROUNDF:
18265
+ tree new_tree = NULL_TREE;
18266
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
18268
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2dfv2di];
18269
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
18271
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv4sfv4si];
18272
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
18274
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2sfv2si];
18281
@@ -1290,5 +1311,160 @@
18287
+#define VAR1(T, N, MAP, A) \
18288
+ case AARCH64_SIMD_BUILTIN_##N##A:
18291
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
18292
+ bool ignore ATTRIBUTE_UNUSED)
18294
+ int fcode = DECL_FUNCTION_CODE (fndecl);
18295
+ tree type = TREE_TYPE (TREE_TYPE (fndecl));
18299
+ BUILTIN_VALLDI (UNOP, abs, 2)
18300
+ return fold_build1 (ABS_EXPR, type, args[0]);
18302
+ BUILTIN_VALLDI (BINOP, cmge, 0)
18303
+ return fold_build2 (GE_EXPR, type, args[0], args[1]);
18305
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
18306
+ return fold_build2 (GT_EXPR, type, args[0], args[1]);
18308
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
18309
+ return fold_build2 (EQ_EXPR, type, args[0], args[1]);
18311
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
18313
+ tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
18314
+ tree vec_zero_node = build_zero_cst (type);
18315
+ return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
18318
+ VAR1 (UNOP, floatv2si, 2, v2sf)
18319
+ VAR1 (UNOP, floatv4si, 2, v4sf)
18320
+ VAR1 (UNOP, floatv2di, 2, v2df)
18321
+ return fold_build1 (FLOAT_EXPR, type, args[0]);
18326
+ return NULL_TREE;
18330
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
18332
+ bool changed = false;
18333
+ gimple stmt = gsi_stmt (*gsi);
18334
+ tree call = gimple_call_fn (stmt);
18336
+ gimple new_stmt = NULL;
18339
+ fndecl = gimple_call_fndecl (stmt);
18342
+ int fcode = DECL_FUNCTION_CODE (fndecl);
18343
+ int nargs = gimple_call_num_args (stmt);
18344
+ tree *args = (nargs > 0
18345
+ ? gimple_call_arg_ptr (stmt, 0)
18346
+ : &error_mark_node);
18350
+ BUILTIN_VALL (UNOP, reduc_splus_, 10)
18351
+ new_stmt = gimple_build_assign_with_ops (
18353
+ gimple_call_lhs (stmt),
18357
+ BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
18358
+ new_stmt = gimple_build_assign_with_ops (
18360
+ gimple_call_lhs (stmt),
18364
+ BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
18365
+ new_stmt = gimple_build_assign_with_ops (
18367
+ gimple_call_lhs (stmt),
18380
+ gsi_replace (gsi, new_stmt, true);
18387
#undef AARCH64_CHECK_BUILTIN_MODE
18388
#undef AARCH64_FIND_FRINT_VARIANT
18390
+#undef BUILTIN_SDQ_I
18391
+#undef BUILTIN_SD_HSI
18392
+#undef BUILTIN_V2F
18393
+#undef BUILTIN_VALL
18396
+#undef BUILTIN_VDC
18397
+#undef BUILTIN_VDIC
18398
+#undef BUILTIN_VDN
18399
+#undef BUILTIN_VDQ
18400
+#undef BUILTIN_VDQF
18401
+#undef BUILTIN_VDQH
18402
+#undef BUILTIN_VDQHS
18403
+#undef BUILTIN_VDQIF
18404
+#undef BUILTIN_VDQM
18405
+#undef BUILTIN_VDQV
18406
+#undef BUILTIN_VDQ_BHSI
18407
+#undef BUILTIN_VDQ_I
18408
+#undef BUILTIN_VDW
18409
+#undef BUILTIN_VD_BHSI
18410
+#undef BUILTIN_VD_HSI
18411
+#undef BUILTIN_VD_RE
18413
+#undef BUILTIN_VQN
18414
+#undef BUILTIN_VQW
18415
+#undef BUILTIN_VQ_HSI
18416
+#undef BUILTIN_VQ_S
18417
+#undef BUILTIN_VSDQ_HSI
18418
+#undef BUILTIN_VSDQ_I
18419
+#undef BUILTIN_VSDQ_I_BHSI
18420
+#undef BUILTIN_VSDQ_I_DI
18421
+#undef BUILTIN_VSD_HSI
18422
+#undef BUILTIN_VSQN_HSDI
18423
+#undef BUILTIN_VSTRUCT
18442
--- a/src/gcc/config/aarch64/aarch64-protos.h
18443
+++ b/src/gcc/config/aarch64/aarch64-protos.h
18445
Each of of these represents a thread-local symbol, and corresponds to the
18446
thread local storage relocation operator for the symbol being referred to.
18448
+ SYMBOL_TINY_ABSOLUTE
18450
+ Generate symbol accesses as a PC relative address using a single
18451
+ instruction. To compute the address of symbol foo, we generate:
18455
SYMBOL_FORCE_TO_MEM : Global variables are addressed using
18456
constant pool. All variable addresses are spilled into constant
18457
pools. The constant pools themselves are addressed using PC
18459
SYMBOL_SMALL_TLSDESC,
18460
SYMBOL_SMALL_GOTTPREL,
18461
SYMBOL_SMALL_TPREL,
18462
+ SYMBOL_TINY_ABSOLUTE,
18463
SYMBOL_FORCE_TO_MEM
18466
@@ -126,35 +134,66 @@
18470
+/* Cost for vector insn classes. */
18471
+struct cpu_vector_cost
18473
+ const int scalar_stmt_cost; /* Cost of any scalar operation,
18474
+ excluding load and store. */
18475
+ const int scalar_load_cost; /* Cost of scalar load. */
18476
+ const int scalar_store_cost; /* Cost of scalar store. */
18477
+ const int vec_stmt_cost; /* Cost of any vector operation,
18478
+ excluding load, store,
18479
+ vector-to-scalar and
18480
+ scalar-to-vector operation. */
18481
+ const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
18482
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector
18484
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
18485
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
18486
+ const int vec_unalign_store_cost; /* Cost of unaligned vector store. */
18487
+ const int vec_store_cost; /* Cost of vector store. */
18488
+ const int cond_taken_branch_cost; /* Cost of taken branch. */
18489
+ const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
18494
const struct cpu_rtx_cost_table *const insn_extra_cost;
18495
const struct cpu_addrcost_table *const addr_cost;
18496
const struct cpu_regmove_cost *const regmove_cost;
18497
+ const struct cpu_vector_cost *const vec_costs;
18498
const int memmov_cost;
18501
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
18502
bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
18503
+enum aarch64_symbol_type
18504
+aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
18505
bool aarch64_constant_address_p (rtx);
18506
bool aarch64_float_const_zero_rtx_p (rtx);
18507
bool aarch64_function_arg_regno_p (unsigned);
18508
bool aarch64_gen_movmemqi (rtx *);
18509
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
18510
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
18511
bool aarch64_is_long_call_p (rtx);
18512
bool aarch64_label_mentioned_p (rtx);
18513
bool aarch64_legitimate_pic_operand_p (rtx);
18514
bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
18515
+bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
18516
+ enum machine_mode);
18517
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
18518
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
18519
bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
18520
bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
18521
bool aarch64_regno_ok_for_base_p (int, bool);
18522
bool aarch64_regno_ok_for_index_p (int, bool);
18523
bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
18524
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
18525
+bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
18526
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
18527
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
18528
+ struct simd_immediate_info *);
18529
bool aarch64_symbolic_address_p (rtx);
18530
-bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
18531
- enum aarch64_symbol_type *);
18532
bool aarch64_uimm12_shift (HOST_WIDE_INT);
18533
const char *aarch64_output_casesi (rtx *);
18534
enum aarch64_symbol_type aarch64_classify_symbol (rtx,
18535
@@ -165,9 +204,6 @@
18536
int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode);
18537
int aarch64_hard_regno_nregs (unsigned, enum machine_mode);
18538
int aarch64_simd_attr_length_move (rtx);
18539
-int aarch64_simd_immediate_valid_for_move (rtx, enum machine_mode, rtx *,
18540
- int *, unsigned char *, int *,
18542
int aarch64_uxt_size (int, HOST_WIDE_INT);
18543
rtx aarch64_final_eh_return_addr (void);
18544
rtx aarch64_legitimize_reload_address (rtx *, enum machine_mode, int, int, int);
18545
@@ -177,6 +213,7 @@
18546
bool aarch64_simd_mem_operand_p (rtx);
18547
rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
18548
rtx aarch64_tls_get_addr (void);
18549
+tree aarch64_fold_builtin (tree, int, tree *, bool);
18550
unsigned aarch64_dbx_register_number (unsigned);
18551
unsigned aarch64_trampoline_size (void);
18552
void aarch64_asm_output_labelref (FILE *, const char *);
18553
@@ -216,6 +253,8 @@
18555
bool aarch64_split_128bit_move_p (rtx, rtx);
18557
+void aarch64_split_simd_move (rtx, rtx);
18559
/* Check for a legitimate floating point constant for FMOV. */
18560
bool aarch64_float_const_representable_p (rtx);
18562
@@ -249,6 +288,4 @@
18563
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
18565
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
18567
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
18568
#endif /* GCC_AARCH64_PROTOS_H */
18569
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
18570
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
18571
@@ -18,248 +18,346 @@
18572
along with GCC; see the file COPYING3. If not see
18573
<http://www.gnu.org/licenses/>. */
18575
-/* In the list below, the BUILTIN_<ITERATOR> macros should
18576
- correspond to the iterator used to construct the instruction's
18577
- patterns in aarch64-simd.md. A helpful idiom to follow when
18578
- adding new builtins is to add a line for each pattern in the md
18579
- file. Thus, ADDP, which has one pattern defined for the VD_BHSI
18580
- iterator, and one for DImode, has two entries below. */
18581
+/* In the list below, the BUILTIN_<ITERATOR> macros expand to create
18582
+ builtins for each of the modes described by <ITERATOR>. When adding
18583
+ new builtins to this list, a helpful idiom to follow is to add
18584
+ a line for each pattern in the md file. Thus, ADDP, which has one
18585
+ pattern defined for the VD_BHSI iterator, and one for DImode, has two
18588
- BUILTIN_VD_RE (CREATE, create)
18589
- BUILTIN_VQ_S (GETLANE, get_lane_signed)
18590
- BUILTIN_VDQ (GETLANE, get_lane_unsigned)
18591
- BUILTIN_VDQF (GETLANE, get_lane)
18592
- VAR1 (GETLANE, get_lane, di)
18593
- BUILTIN_VDC (COMBINE, combine)
18594
- BUILTIN_VB (BINOP, pmul)
18595
- BUILTIN_VDQF (UNOP, sqrt)
18596
- BUILTIN_VD_BHSI (BINOP, addp)
18597
- VAR1 (UNOP, addp, di)
18598
+ Parameter 1 is the 'type' of the intrinsic. This is used to
18599
+ describe the type modifiers (for example; unsigned) applied to
18600
+ each of the parameters to the intrinsic function.
18602
- BUILTIN_VD_RE (REINTERP, reinterpretdi)
18603
- BUILTIN_VDC (REINTERP, reinterpretv8qi)
18604
- BUILTIN_VDC (REINTERP, reinterpretv4hi)
18605
- BUILTIN_VDC (REINTERP, reinterpretv2si)
18606
- BUILTIN_VDC (REINTERP, reinterpretv2sf)
18607
- BUILTIN_VQ (REINTERP, reinterpretv16qi)
18608
- BUILTIN_VQ (REINTERP, reinterpretv8hi)
18609
- BUILTIN_VQ (REINTERP, reinterpretv4si)
18610
- BUILTIN_VQ (REINTERP, reinterpretv4sf)
18611
- BUILTIN_VQ (REINTERP, reinterpretv2di)
18612
- BUILTIN_VQ (REINTERP, reinterpretv2df)
18613
+ Parameter 2 is the name of the intrinsic. This is appended
18614
+ to `__builtin_aarch64_<name><mode>` to give the intrinsic name
18615
+ as exported to the front-ends.
18617
- BUILTIN_VDQ_I (BINOP, dup_lane)
18618
- BUILTIN_SDQ_I (BINOP, dup_lane)
18619
+ Parameter 3 describes how to map from the name to the CODE_FOR_
18620
+ macro holding the RTL pattern for the intrinsic. This mapping is:
18621
+ 0 - CODE_FOR_aarch64_<name><mode>
18622
+ 1-9 - CODE_FOR_<name><mode><1-9>
18623
+ 10 - CODE_FOR_<name><mode>. */
18625
+ BUILTIN_VD_RE (CREATE, create, 0)
18626
+ BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
18627
+ BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
18628
+ BUILTIN_VDQF (GETLANE, get_lane, 0)
18629
+ VAR1 (GETLANE, get_lane, 0, di)
18630
+ BUILTIN_VDC (COMBINE, combine, 0)
18631
+ BUILTIN_VB (BINOP, pmul, 0)
18632
+ BUILTIN_VDQF (UNOP, sqrt, 2)
18633
+ BUILTIN_VD_BHSI (BINOP, addp, 0)
18634
+ VAR1 (UNOP, addp, 0, di)
18635
+ VAR1 (UNOP, clz, 2, v4si)
18637
+ BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
18638
+ BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
18639
+ BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
18640
+ BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
18641
+ BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
18642
+ BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
18643
+ BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
18644
+ BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
18645
+ BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
18646
+ BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
18647
+ BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
18649
+ BUILTIN_VDQ_I (BINOP, dup_lane, 0)
18650
+ BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0)
18651
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
18652
- BUILTIN_VSDQ_I (BINOP, sqshl)
18653
- BUILTIN_VSDQ_I (BINOP, uqshl)
18654
- BUILTIN_VSDQ_I (BINOP, sqrshl)
18655
- BUILTIN_VSDQ_I (BINOP, uqrshl)
18656
+ BUILTIN_VSDQ_I (BINOP, sqshl, 0)
18657
+ BUILTIN_VSDQ_I (BINOP, uqshl, 0)
18658
+ BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
18659
+ BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
18660
/* Implemented by aarch64_<su_optab><optab><mode>. */
18661
- BUILTIN_VSDQ_I (BINOP, sqadd)
18662
- BUILTIN_VSDQ_I (BINOP, uqadd)
18663
- BUILTIN_VSDQ_I (BINOP, sqsub)
18664
- BUILTIN_VSDQ_I (BINOP, uqsub)
18665
+ BUILTIN_VSDQ_I (BINOP, sqadd, 0)
18666
+ BUILTIN_VSDQ_I (BINOP, uqadd, 0)
18667
+ BUILTIN_VSDQ_I (BINOP, sqsub, 0)
18668
+ BUILTIN_VSDQ_I (BINOP, uqsub, 0)
18669
/* Implemented by aarch64_<sur>qadd<mode>. */
18670
- BUILTIN_VSDQ_I (BINOP, suqadd)
18671
- BUILTIN_VSDQ_I (BINOP, usqadd)
18672
+ BUILTIN_VSDQ_I (BINOP, suqadd, 0)
18673
+ BUILTIN_VSDQ_I (BINOP, usqadd, 0)
18675
/* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
18676
- BUILTIN_VDC (GETLANE, get_dregoi)
18677
- BUILTIN_VDC (GETLANE, get_dregci)
18678
- BUILTIN_VDC (GETLANE, get_dregxi)
18679
+ BUILTIN_VDC (GETLANE, get_dregoi, 0)
18680
+ BUILTIN_VDC (GETLANE, get_dregci, 0)
18681
+ BUILTIN_VDC (GETLANE, get_dregxi, 0)
18682
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
18683
- BUILTIN_VQ (GETLANE, get_qregoi)
18684
- BUILTIN_VQ (GETLANE, get_qregci)
18685
- BUILTIN_VQ (GETLANE, get_qregxi)
18686
+ BUILTIN_VQ (GETLANE, get_qregoi, 0)
18687
+ BUILTIN_VQ (GETLANE, get_qregci, 0)
18688
+ BUILTIN_VQ (GETLANE, get_qregxi, 0)
18689
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
18690
- BUILTIN_VQ (SETLANE, set_qregoi)
18691
- BUILTIN_VQ (SETLANE, set_qregci)
18692
- BUILTIN_VQ (SETLANE, set_qregxi)
18693
+ BUILTIN_VQ (SETLANE, set_qregoi, 0)
18694
+ BUILTIN_VQ (SETLANE, set_qregci, 0)
18695
+ BUILTIN_VQ (SETLANE, set_qregxi, 0)
18696
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
18697
- BUILTIN_VDC (LOADSTRUCT, ld2)
18698
- BUILTIN_VDC (LOADSTRUCT, ld3)
18699
- BUILTIN_VDC (LOADSTRUCT, ld4)
18700
+ BUILTIN_VDC (LOADSTRUCT, ld2, 0)
18701
+ BUILTIN_VDC (LOADSTRUCT, ld3, 0)
18702
+ BUILTIN_VDC (LOADSTRUCT, ld4, 0)
18703
/* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */
18704
- BUILTIN_VQ (LOADSTRUCT, ld2)
18705
- BUILTIN_VQ (LOADSTRUCT, ld3)
18706
- BUILTIN_VQ (LOADSTRUCT, ld4)
18707
+ BUILTIN_VQ (LOADSTRUCT, ld2, 0)
18708
+ BUILTIN_VQ (LOADSTRUCT, ld3, 0)
18709
+ BUILTIN_VQ (LOADSTRUCT, ld4, 0)
18710
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
18711
- BUILTIN_VDC (STORESTRUCT, st2)
18712
- BUILTIN_VDC (STORESTRUCT, st3)
18713
- BUILTIN_VDC (STORESTRUCT, st4)
18714
+ BUILTIN_VDC (STORESTRUCT, st2, 0)
18715
+ BUILTIN_VDC (STORESTRUCT, st3, 0)
18716
+ BUILTIN_VDC (STORESTRUCT, st4, 0)
18717
/* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */
18718
- BUILTIN_VQ (STORESTRUCT, st2)
18719
- BUILTIN_VQ (STORESTRUCT, st3)
18720
- BUILTIN_VQ (STORESTRUCT, st4)
18721
+ BUILTIN_VQ (STORESTRUCT, st2, 0)
18722
+ BUILTIN_VQ (STORESTRUCT, st3, 0)
18723
+ BUILTIN_VQ (STORESTRUCT, st4, 0)
18725
- BUILTIN_VQW (BINOP, saddl2)
18726
- BUILTIN_VQW (BINOP, uaddl2)
18727
- BUILTIN_VQW (BINOP, ssubl2)
18728
- BUILTIN_VQW (BINOP, usubl2)
18729
- BUILTIN_VQW (BINOP, saddw2)
18730
- BUILTIN_VQW (BINOP, uaddw2)
18731
- BUILTIN_VQW (BINOP, ssubw2)
18732
- BUILTIN_VQW (BINOP, usubw2)
18733
+ BUILTIN_VQW (BINOP, saddl2, 0)
18734
+ BUILTIN_VQW (BINOP, uaddl2, 0)
18735
+ BUILTIN_VQW (BINOP, ssubl2, 0)
18736
+ BUILTIN_VQW (BINOP, usubl2, 0)
18737
+ BUILTIN_VQW (BINOP, saddw2, 0)
18738
+ BUILTIN_VQW (BINOP, uaddw2, 0)
18739
+ BUILTIN_VQW (BINOP, ssubw2, 0)
18740
+ BUILTIN_VQW (BINOP, usubw2, 0)
18741
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */
18742
- BUILTIN_VDW (BINOP, saddl)
18743
- BUILTIN_VDW (BINOP, uaddl)
18744
- BUILTIN_VDW (BINOP, ssubl)
18745
- BUILTIN_VDW (BINOP, usubl)
18746
+ BUILTIN_VDW (BINOP, saddl, 0)
18747
+ BUILTIN_VDW (BINOP, uaddl, 0)
18748
+ BUILTIN_VDW (BINOP, ssubl, 0)
18749
+ BUILTIN_VDW (BINOP, usubl, 0)
18750
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */
18751
- BUILTIN_VDW (BINOP, saddw)
18752
- BUILTIN_VDW (BINOP, uaddw)
18753
- BUILTIN_VDW (BINOP, ssubw)
18754
- BUILTIN_VDW (BINOP, usubw)
18755
+ BUILTIN_VDW (BINOP, saddw, 0)
18756
+ BUILTIN_VDW (BINOP, uaddw, 0)
18757
+ BUILTIN_VDW (BINOP, ssubw, 0)
18758
+ BUILTIN_VDW (BINOP, usubw, 0)
18759
/* Implemented by aarch64_<sur>h<addsub><mode>. */
18760
- BUILTIN_VQ_S (BINOP, shadd)
18761
- BUILTIN_VQ_S (BINOP, uhadd)
18762
- BUILTIN_VQ_S (BINOP, srhadd)
18763
- BUILTIN_VQ_S (BINOP, urhadd)
18764
+ BUILTIN_VQ_S (BINOP, shadd, 0)
18765
+ BUILTIN_VQ_S (BINOP, uhadd, 0)
18766
+ BUILTIN_VQ_S (BINOP, srhadd, 0)
18767
+ BUILTIN_VQ_S (BINOP, urhadd, 0)
18768
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
18769
- BUILTIN_VQN (BINOP, addhn)
18770
- BUILTIN_VQN (BINOP, raddhn)
18771
+ BUILTIN_VQN (BINOP, addhn, 0)
18772
+ BUILTIN_VQN (BINOP, raddhn, 0)
18773
/* Implemented by aarch64_<sur><addsub>hn2<mode>. */
18774
- BUILTIN_VQN (TERNOP, addhn2)
18775
- BUILTIN_VQN (TERNOP, raddhn2)
18776
+ BUILTIN_VQN (TERNOP, addhn2, 0)
18777
+ BUILTIN_VQN (TERNOP, raddhn2, 0)
18779
- BUILTIN_VSQN_HSDI (UNOP, sqmovun)
18780
+ BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
18781
/* Implemented by aarch64_<sur>qmovn<mode>. */
18782
- BUILTIN_VSQN_HSDI (UNOP, sqmovn)
18783
- BUILTIN_VSQN_HSDI (UNOP, uqmovn)
18784
+ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
18785
+ BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
18786
/* Implemented by aarch64_s<optab><mode>. */
18787
- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs)
18788
- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg)
18789
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
18790
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
18792
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane)
18793
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane)
18794
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq)
18795
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq)
18796
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2)
18797
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2)
18798
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane)
18799
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane)
18800
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq)
18801
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq)
18802
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n)
18803
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n)
18804
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
18805
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
18806
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
18807
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
18808
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
18809
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
18810
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
18811
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
18812
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
18813
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
18814
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
18815
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
18816
/* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */
18817
- BUILTIN_VSD_HSI (TERNOP, sqdmlal)
18818
- BUILTIN_VSD_HSI (TERNOP, sqdmlsl)
18819
+ BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
18820
+ BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
18821
/* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */
18822
- BUILTIN_VD_HSI (TERNOP, sqdmlal_n)
18823
- BUILTIN_VD_HSI (TERNOP, sqdmlsl_n)
18824
+ BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
18825
+ BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
18827
- BUILTIN_VSD_HSI (BINOP, sqdmull)
18828
- BUILTIN_VSD_HSI (TERNOP, sqdmull_lane)
18829
- BUILTIN_VD_HSI (TERNOP, sqdmull_laneq)
18830
- BUILTIN_VD_HSI (BINOP, sqdmull_n)
18831
- BUILTIN_VQ_HSI (BINOP, sqdmull2)
18832
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane)
18833
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq)
18834
- BUILTIN_VQ_HSI (BINOP, sqdmull2_n)
18835
+ BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
18836
+ BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
18837
+ BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
18838
+ BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
18839
+ BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
18840
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
18841
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0)
18842
+ BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
18843
/* Implemented by aarch64_sq<r>dmulh<mode>. */
18844
- BUILTIN_VSDQ_HSI (BINOP, sqdmulh)
18845
- BUILTIN_VSDQ_HSI (BINOP, sqrdmulh)
18846
+ BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
18847
+ BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
18848
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
18849
- BUILTIN_VDQHS (TERNOP, sqdmulh_lane)
18850
- BUILTIN_VDQHS (TERNOP, sqdmulh_laneq)
18851
- BUILTIN_VDQHS (TERNOP, sqrdmulh_lane)
18852
- BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq)
18853
- BUILTIN_SD_HSI (TERNOP, sqdmulh_lane)
18854
- BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane)
18855
+ BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
18856
+ BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
18857
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
18858
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
18859
+ BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
18860
+ BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
18862
- BUILTIN_VSDQ_I_DI (BINOP, sshl_n)
18863
- BUILTIN_VSDQ_I_DI (BINOP, ushl_n)
18864
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
18865
/* Implemented by aarch64_<sur>shl<mode>. */
18866
- BUILTIN_VSDQ_I_DI (BINOP, sshl)
18867
- BUILTIN_VSDQ_I_DI (BINOP, ushl)
18868
- BUILTIN_VSDQ_I_DI (BINOP, srshl)
18869
- BUILTIN_VSDQ_I_DI (BINOP, urshl)
18870
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
18871
+ BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
18872
+ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
18873
+ BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
18875
- BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n)
18876
- BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n)
18877
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3)
18878
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
18879
/* Implemented by aarch64_<sur>shr_n<mode>. */
18880
- BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n)
18881
- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n)
18882
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
18883
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
18884
/* Implemented by aarch64_<sur>sra_n<mode>. */
18885
- BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n)
18886
- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n)
18887
- BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n)
18888
- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n)
18889
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
18890
+ BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
18891
+ BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
18892
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
18893
/* Implemented by aarch64_<sur>shll_n<mode>. */
18894
- BUILTIN_VDW (SHIFTIMM, sshll_n)
18895
- BUILTIN_VDW (SHIFTIMM, ushll_n)
18896
+ BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
18897
+ BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
18898
/* Implemented by aarch64_<sur>shll2_n<mode>. */
18899
- BUILTIN_VQW (SHIFTIMM, sshll2_n)
18900
- BUILTIN_VQW (SHIFTIMM, ushll2_n)
18901
+ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
18902
+ BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
18903
/* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
18904
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n)
18905
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n)
18906
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n)
18907
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n)
18908
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n)
18909
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n)
18910
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
18911
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
18912
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
18913
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
18914
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
18915
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
18916
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
18917
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n)
18918
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n)
18919
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n)
18920
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n)
18921
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
18922
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
18923
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
18924
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
18925
/* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
18926
- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n)
18927
- BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n)
18928
- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n)
18929
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
18930
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
18931
+ BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
18933
/* Implemented by aarch64_cm<cmp><mode>. */
18934
- BUILTIN_VSDQ_I_DI (BINOP, cmeq)
18935
- BUILTIN_VSDQ_I_DI (BINOP, cmge)
18936
- BUILTIN_VSDQ_I_DI (BINOP, cmgt)
18937
- BUILTIN_VSDQ_I_DI (BINOP, cmle)
18938
- BUILTIN_VSDQ_I_DI (BINOP, cmlt)
18939
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
18940
+ BUILTIN_VALLDI (BINOP, cmge, 0)
18941
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
18942
+ BUILTIN_VALLDI (BINOP, cmle, 0)
18943
+ BUILTIN_VALLDI (BINOP, cmlt, 0)
18944
/* Implemented by aarch64_cm<cmp><mode>. */
18945
- BUILTIN_VSDQ_I_DI (BINOP, cmhs)
18946
- BUILTIN_VSDQ_I_DI (BINOP, cmhi)
18947
- BUILTIN_VSDQ_I_DI (BINOP, cmtst)
18948
+ BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
18949
+ BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
18950
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
18952
- /* Implemented by aarch64_<fmaxmin><mode>. */
18953
- BUILTIN_VDQF (BINOP, fmax)
18954
- BUILTIN_VDQF (BINOP, fmin)
18955
- /* Implemented by aarch64_<maxmin><mode>. */
18956
- BUILTIN_VDQ_BHSI (BINOP, smax)
18957
- BUILTIN_VDQ_BHSI (BINOP, smin)
18958
- BUILTIN_VDQ_BHSI (BINOP, umax)
18959
- BUILTIN_VDQ_BHSI (BINOP, umin)
18960
+ /* Implemented by reduc_<sur>plus_<mode>. */
18961
+ BUILTIN_VALL (UNOP, reduc_splus_, 10)
18962
+ BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
18964
- /* Implemented by aarch64_frint<frint_suffix><mode>. */
18965
- BUILTIN_VDQF (UNOP, frintz)
18966
- BUILTIN_VDQF (UNOP, frintp)
18967
- BUILTIN_VDQF (UNOP, frintm)
18968
- BUILTIN_VDQF (UNOP, frinti)
18969
- BUILTIN_VDQF (UNOP, frintx)
18970
- BUILTIN_VDQF (UNOP, frinta)
18971
+ /* Implemented by reduc_<maxmin_uns>_<mode>. */
18972
+ BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
18973
+ BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
18974
+ BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10)
18975
+ BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10)
18976
+ BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10)
18977
+ BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10)
18979
- /* Implemented by aarch64_fcvt<frint_suffix><su><mode>. */
18980
- BUILTIN_VDQF (UNOP, fcvtzs)
18981
- BUILTIN_VDQF (UNOP, fcvtzu)
18982
- BUILTIN_VDQF (UNOP, fcvtas)
18983
- BUILTIN_VDQF (UNOP, fcvtau)
18984
- BUILTIN_VDQF (UNOP, fcvtps)
18985
- BUILTIN_VDQF (UNOP, fcvtpu)
18986
- BUILTIN_VDQF (UNOP, fcvtms)
18987
- BUILTIN_VDQF (UNOP, fcvtmu)
18988
+ /* Implemented by <maxmin><mode>3.
18989
+ smax variants map to fmaxnm,
18990
+ smax_nan variants map to fmax. */
18991
+ BUILTIN_VDQIF (BINOP, smax, 3)
18992
+ BUILTIN_VDQIF (BINOP, smin, 3)
18993
+ BUILTIN_VDQ_BHSI (BINOP, umax, 3)
18994
+ BUILTIN_VDQ_BHSI (BINOP, umin, 3)
18995
+ BUILTIN_VDQF (BINOP, smax_nan, 3)
18996
+ BUILTIN_VDQF (BINOP, smin_nan, 3)
18998
+ /* Implemented by <frint_pattern><mode>2. */
18999
+ BUILTIN_VDQF (UNOP, btrunc, 2)
19000
+ BUILTIN_VDQF (UNOP, ceil, 2)
19001
+ BUILTIN_VDQF (UNOP, floor, 2)
19002
+ BUILTIN_VDQF (UNOP, nearbyint, 2)
19003
+ BUILTIN_VDQF (UNOP, rint, 2)
19004
+ BUILTIN_VDQF (UNOP, round, 2)
19005
+ BUILTIN_VDQF (UNOP, frintn, 2)
19007
+ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
19008
+ VAR1 (UNOP, lbtruncv2sf, 2, v2si)
19009
+ VAR1 (UNOP, lbtruncv4sf, 2, v4si)
19010
+ VAR1 (UNOP, lbtruncv2df, 2, v2di)
19012
+ VAR1 (UNOP, lbtruncuv2sf, 2, v2si)
19013
+ VAR1 (UNOP, lbtruncuv4sf, 2, v4si)
19014
+ VAR1 (UNOP, lbtruncuv2df, 2, v2di)
19016
+ VAR1 (UNOP, lroundv2sf, 2, v2si)
19017
+ VAR1 (UNOP, lroundv4sf, 2, v4si)
19018
+ VAR1 (UNOP, lroundv2df, 2, v2di)
19019
+ /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */
19020
+ VAR1 (UNOP, lroundsf, 2, si)
19021
+ VAR1 (UNOP, lrounddf, 2, di)
19023
+ VAR1 (UNOP, lrounduv2sf, 2, v2si)
19024
+ VAR1 (UNOP, lrounduv4sf, 2, v4si)
19025
+ VAR1 (UNOP, lrounduv2df, 2, v2di)
19026
+ VAR1 (UNOP, lroundusf, 2, si)
19027
+ VAR1 (UNOP, lroundudf, 2, di)
19029
+ VAR1 (UNOP, lceilv2sf, 2, v2si)
19030
+ VAR1 (UNOP, lceilv4sf, 2, v4si)
19031
+ VAR1 (UNOP, lceilv2df, 2, v2di)
19033
+ VAR1 (UNOP, lceiluv2sf, 2, v2si)
19034
+ VAR1 (UNOP, lceiluv4sf, 2, v4si)
19035
+ VAR1 (UNOP, lceiluv2df, 2, v2di)
19036
+ VAR1 (UNOP, lceilusf, 2, si)
19037
+ VAR1 (UNOP, lceiludf, 2, di)
19039
+ VAR1 (UNOP, lfloorv2sf, 2, v2si)
19040
+ VAR1 (UNOP, lfloorv4sf, 2, v4si)
19041
+ VAR1 (UNOP, lfloorv2df, 2, v2di)
19043
+ VAR1 (UNOP, lflooruv2sf, 2, v2si)
19044
+ VAR1 (UNOP, lflooruv4sf, 2, v4si)
19045
+ VAR1 (UNOP, lflooruv2df, 2, v2di)
19046
+ VAR1 (UNOP, lfloorusf, 2, si)
19047
+ VAR1 (UNOP, lfloorudf, 2, di)
19049
+ VAR1 (UNOP, lfrintnv2sf, 2, v2si)
19050
+ VAR1 (UNOP, lfrintnv4sf, 2, v4si)
19051
+ VAR1 (UNOP, lfrintnv2df, 2, v2di)
19052
+ VAR1 (UNOP, lfrintnsf, 2, si)
19053
+ VAR1 (UNOP, lfrintndf, 2, di)
19055
+ VAR1 (UNOP, lfrintnuv2sf, 2, v2si)
19056
+ VAR1 (UNOP, lfrintnuv4sf, 2, v4si)
19057
+ VAR1 (UNOP, lfrintnuv2df, 2, v2di)
19058
+ VAR1 (UNOP, lfrintnusf, 2, si)
19059
+ VAR1 (UNOP, lfrintnudf, 2, di)
19061
+ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
19062
+ VAR1 (UNOP, floatv2si, 2, v2sf)
19063
+ VAR1 (UNOP, floatv4si, 2, v4sf)
19064
+ VAR1 (UNOP, floatv2di, 2, v2df)
19066
+ VAR1 (UNOP, floatunsv2si, 2, v2sf)
19067
+ VAR1 (UNOP, floatunsv4si, 2, v4sf)
19068
+ VAR1 (UNOP, floatunsv2di, 2, v2df)
19071
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
19072
- BUILTIN_VALL (BINOP, zip1)
19073
- BUILTIN_VALL (BINOP, zip2)
19074
- BUILTIN_VALL (BINOP, uzp1)
19075
- BUILTIN_VALL (BINOP, uzp2)
19076
- BUILTIN_VALL (BINOP, trn1)
19077
- BUILTIN_VALL (BINOP, trn2)
19078
+ BUILTIN_VALL (BINOP, zip1, 0)
19079
+ BUILTIN_VALL (BINOP, zip2, 0)
19080
+ BUILTIN_VALL (BINOP, uzp1, 0)
19081
+ BUILTIN_VALL (BINOP, uzp2, 0)
19082
+ BUILTIN_VALL (BINOP, trn1, 0)
19083
+ BUILTIN_VALL (BINOP, trn2, 0)
19085
+ /* Implemented by
19086
+ aarch64_frecp<FRECP:frecp_suffix><mode>. */
19087
+ BUILTIN_GPF (UNOP, frecpe, 0)
19088
+ BUILTIN_GPF (BINOP, frecps, 0)
19089
+ BUILTIN_GPF (UNOP, frecpx, 0)
19091
+ BUILTIN_VDQF (UNOP, frecpe, 0)
19092
+ BUILTIN_VDQF (BINOP, frecps, 0)
19094
+ BUILTIN_VALLDI (UNOP, abs, 2)
19096
+ VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
19097
+ VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
19099
+ VAR1 (UNOP, float_extend_lo_, 0, v2df)
19100
+ VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
19102
/* Implemented by aarch64_ld1<VALL:mode>. */
19103
- BUILTIN_VALL (LOAD1, ld1)
19104
+ BUILTIN_VALL (LOAD1, ld1, 0)
19106
/* Implemented by aarch64_st1<VALL:mode>. */
19107
- BUILTIN_VALL (STORE1, st1)
19108
+ BUILTIN_VALL (STORE1, st1, 0)
19110
--- a/src/gcc/config/aarch64/constraints.md
19111
+++ b/src/gcc/config/aarch64/constraints.md
19113
"Integer constant zero."
19114
(match_test "op == const0_rtx"))
19116
-(define_constraint "Usa"
19117
- "A constraint that matches an absolute symbolic address."
19118
- (and (match_code "const,symbol_ref")
19119
- (match_test "aarch64_symbolic_address_p (op)")))
19121
(define_constraint "Ush"
19122
"A constraint that matches an absolute symbolic address high part."
19123
(and (match_code "high")
19124
@@ -148,10 +143,25 @@
19126
A constraint that matches vector of immediates."
19127
(and (match_code "const_vector")
19128
- (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
19129
- NULL, NULL, NULL,
19130
- NULL, NULL) != 0")))
19131
+ (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
19134
+(define_constraint "Dh"
19136
+ A constraint that matches an immediate operand valid for\
19137
+ AdvSIMD scalar move in HImode."
19138
+ (and (match_code "const_int")
19139
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
19142
+(define_constraint "Dq"
19144
+ A constraint that matches an immediate operand valid for\
19145
+ AdvSIMD scalar move in QImode."
19146
+ (and (match_code "const_int")
19147
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
19150
(define_constraint "Dl"
19152
A constraint that matches vector of immediates for left shifts."
19153
--- a/src/gcc/config/aarch64/aarch64.c
19154
+++ b/src/gcc/config/aarch64/aarch64.c
19156
#include "gimple.h"
19157
#include "optabs.h"
19158
#include "dwarf2.h"
19159
+#include "cfgloop.h"
19160
+#include "tree-vectorizer.h"
19162
/* Classifies an address.
19165
enum aarch64_symbol_type symbol_type;
19168
+struct simd_immediate_info
19172
+ int element_width;
19177
/* The current code model. */
19178
enum aarch64_code_model aarch64_cmodel;
19180
@@ -103,8 +114,6 @@
19181
static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
19182
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
19183
static void aarch64_override_options_after_change (void);
19184
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
19185
- int *, unsigned char *, int *, int *);
19186
static bool aarch64_vector_mode_supported_p (enum machine_mode);
19187
static unsigned bit_count (unsigned HOST_WIDE_INT);
19188
static bool aarch64_const_vec_all_same_int_p (rtx,
19189
@@ -178,14 +187,35 @@
19190
NAMED_PARAM (FP2FP, 4)
19193
+/* Generic costs for vector insn classes. */
19194
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
19197
+static const struct cpu_vector_cost generic_vector_cost =
19199
+ NAMED_PARAM (scalar_stmt_cost, 1),
19200
+ NAMED_PARAM (scalar_load_cost, 1),
19201
+ NAMED_PARAM (scalar_store_cost, 1),
19202
+ NAMED_PARAM (vec_stmt_cost, 1),
19203
+ NAMED_PARAM (vec_to_scalar_cost, 1),
19204
+ NAMED_PARAM (scalar_to_vec_cost, 1),
19205
+ NAMED_PARAM (vec_align_load_cost, 1),
19206
+ NAMED_PARAM (vec_unalign_load_cost, 1),
19207
+ NAMED_PARAM (vec_unalign_store_cost, 1),
19208
+ NAMED_PARAM (vec_store_cost, 1),
19209
+ NAMED_PARAM (cond_taken_branch_cost, 3),
19210
+ NAMED_PARAM (cond_not_taken_branch_cost, 1)
19213
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
19216
static const struct tune_params generic_tunings =
19218
&generic_rtx_cost_table,
19219
&generic_addrcost_table,
19220
&generic_regmove_cost,
19221
+ &generic_vector_cost,
19222
NAMED_PARAM (memmov_cost, 4)
19225
@@ -524,13 +554,15 @@
19229
+ case SYMBOL_TINY_ABSOLUTE:
19230
+ emit_insn (gen_rtx_SET (Pmode, dest, imm));
19233
case SYMBOL_SMALL_GOT:
19235
rtx tmp_reg = dest;
19236
if (can_create_pseudo_p ())
19238
- tmp_reg = gen_reg_rtx (Pmode);
19240
+ tmp_reg = gen_reg_rtx (Pmode);
19241
emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
19242
emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
19244
@@ -604,49 +636,85 @@
19248
- gcc_assert (GET_MODE (dst) == TImode);
19249
+ enum machine_mode src_mode = GET_MODE (src);
19250
+ enum machine_mode dst_mode = GET_MODE (dst);
19251
+ int src_regno = REGNO (src);
19252
+ int dst_regno = REGNO (dst);
19254
+ gcc_assert (dst_mode == TImode || dst_mode == TFmode);
19256
if (REG_P (dst) && REG_P (src))
19258
- int src_regno = REGNO (src);
19259
- int dst_regno = REGNO (dst);
19260
+ gcc_assert (src_mode == TImode || src_mode == TFmode);
19262
- gcc_assert (GET_MODE (src) == TImode);
19264
/* Handle r -> w, w -> r. */
19265
if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
19267
- emit_insn (gen_aarch64_movtilow_di (dst,
19268
- gen_lowpart (word_mode, src)));
19269
- emit_insn (gen_aarch64_movtihigh_di (dst,
19270
- gen_highpart (word_mode, src)));
19272
+ switch (src_mode) {
19275
+ (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
19277
+ (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
19281
+ (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
19283
+ (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
19286
+ gcc_unreachable ();
19289
else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
19291
- emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
19293
- emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
19296
+ switch (src_mode) {
19299
+ (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
19301
+ (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
19305
+ (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
19307
+ (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
19310
+ gcc_unreachable ();
19313
/* Fall through to r -> r cases. */
19316
- low_dst = gen_lowpart (word_mode, dst);
19317
- if (REG_P (low_dst)
19318
- && reg_overlap_mentioned_p (low_dst, src))
19320
- aarch64_emit_move (gen_highpart (word_mode, dst),
19321
- gen_highpart_mode (word_mode, TImode, src));
19322
- aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
19326
- aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
19327
- aarch64_emit_move (gen_highpart (word_mode, dst),
19328
- gen_highpart_mode (word_mode, TImode, src));
19330
+ switch (dst_mode) {
19332
+ low_dst = gen_lowpart (word_mode, dst);
19333
+ if (REG_P (low_dst)
19334
+ && reg_overlap_mentioned_p (low_dst, src))
19336
+ aarch64_emit_move (gen_highpart (word_mode, dst),
19337
+ gen_highpart_mode (word_mode, TImode, src));
19338
+ aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
19342
+ aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
19343
+ aarch64_emit_move (gen_highpart (word_mode, dst),
19344
+ gen_highpart_mode (word_mode, TImode, src));
19348
+ emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
19349
+ gen_rtx_REG (DFmode, src_regno));
19350
+ emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
19351
+ gen_rtx_REG (DFmode, src_regno + 1));
19354
+ gcc_unreachable ();
19359
@@ -656,11 +724,56 @@
19360
|| ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
19363
+/* Split a complex SIMD move. */
19366
+aarch64_split_simd_move (rtx dst, rtx src)
19368
+ enum machine_mode src_mode = GET_MODE (src);
19369
+ enum machine_mode dst_mode = GET_MODE (dst);
19371
+ gcc_assert (VECTOR_MODE_P (dst_mode));
19373
+ if (REG_P (dst) && REG_P (src))
19375
+ rtx (*gen) (rtx, rtx);
19377
+ gcc_assert (VECTOR_MODE_P (src_mode));
19379
+ switch (src_mode)
19382
+ gen = gen_aarch64_split_simd_movv16qi;
19385
+ gen = gen_aarch64_split_simd_movv8hi;
19388
+ gen = gen_aarch64_split_simd_movv4si;
19391
+ gen = gen_aarch64_split_simd_movv2di;
19394
+ gen = gen_aarch64_split_simd_movv4sf;
19397
+ gen = gen_aarch64_split_simd_movv2df;
19400
+ gcc_unreachable ();
19403
+ emit_insn (gen (dst, src));
19409
-aarch64_force_temporary (rtx x, rtx value)
19410
+aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
19412
if (can_create_pseudo_p ())
19413
- return force_reg (Pmode, value);
19414
+ return force_reg (mode, value);
19417
x = aarch64_emit_move (x, value);
19418
@@ -672,15 +785,16 @@
19420
aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
19422
- if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
19423
+ if (!aarch64_plus_immediate (GEN_INT (offset), mode))
19426
/* Load the full offset into a register. This
19427
might be improvable in the future. */
19428
high = GEN_INT (offset);
19430
- high = aarch64_force_temporary (temp, high);
19431
- reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
19432
+ high = aarch64_force_temporary (mode, temp, high);
19433
+ reg = aarch64_force_temporary (mode, temp,
19434
+ gen_rtx_PLUS (mode, high, reg));
19436
return plus_constant (mode, reg, offset);
19438
@@ -719,7 +833,7 @@
19439
&& targetm.cannot_force_const_mem (mode, imm))
19441
gcc_assert(can_create_pseudo_p ());
19442
- base = aarch64_force_temporary (dest, base);
19443
+ base = aarch64_force_temporary (mode, dest, base);
19444
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
19445
aarch64_emit_move (dest, base);
19447
@@ -736,7 +850,7 @@
19448
if (offset != const0_rtx)
19450
gcc_assert(can_create_pseudo_p ());
19451
- base = aarch64_force_temporary (dest, base);
19452
+ base = aarch64_force_temporary (mode, dest, base);
19453
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
19454
aarch64_emit_move (dest, base);
19456
@@ -745,6 +859,7 @@
19458
case SYMBOL_SMALL_TPREL:
19459
case SYMBOL_SMALL_ABSOLUTE:
19460
+ case SYMBOL_TINY_ABSOLUTE:
19461
aarch64_load_symref_appropriately (dest, imm, sty);
19464
@@ -2553,12 +2668,14 @@
19465
aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
19469
if (GET_CODE (x) == HIGH)
19472
split_const (x, &base, &offset);
19473
if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
19474
- return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
19475
+ return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
19476
+ != SYMBOL_FORCE_TO_MEM);
19478
return aarch64_tls_referenced_p (x);
19480
@@ -2996,10 +3113,13 @@
19482
/* Classify the base of symbolic expression X, given that X appears in
19483
context CONTEXT. */
19484
-static enum aarch64_symbol_type
19485
-aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
19487
+enum aarch64_symbol_type
19488
+aarch64_classify_symbolic_expression (rtx x,
19489
+ enum aarch64_symbol_context context)
19493
split_const (x, &x, &offset);
19494
return aarch64_classify_symbol (x, context);
19496
@@ -3087,7 +3207,8 @@
19497
if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
19499
&& (code == EQ || code == NE || code == LT || code == GE)
19500
- && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
19501
+ && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
19502
+ || GET_CODE (x) == NEG))
19505
/* A compare with a shifted operand. Because of canonicalization,
19506
@@ -3282,26 +3403,6 @@
19507
asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
19511
- /* Print the least significant register of a pair (TImode) of regs. */
19512
- if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
19514
- output_operand_lossage ("invalid operand for '%%%c'", code);
19517
- asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
19521
- /* Print the most significant register of a pair (TImode) of regs. */
19522
- if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
19524
- output_operand_lossage ("invalid operand for '%%%c'", code);
19527
- asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
19531
/* Print a condition (eq, ne, etc). */
19533
@@ -3349,7 +3450,7 @@
19534
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
19537
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
19538
+ asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
19542
@@ -3362,18 +3463,17 @@
19543
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
19546
- asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
19547
- REGNO (x) - V0_REGNUM + (code - 'S'));
19548
+ asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
19552
- /* Print integer constant in hex. */
19553
+ /* Print bottom 16 bits of integer constant in hex. */
19554
if (GET_CODE (x) != CONST_INT)
19556
output_operand_lossage ("invalid operand for '%%%c'", code);
19559
- asm_fprintf (f, "0x%wx", UINTVAL (x));
19560
+ asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
19564
@@ -3383,20 +3483,19 @@
19565
if (x == const0_rtx
19566
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
19568
- asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
19569
+ asm_fprintf (f, "%czr", code);
19573
if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
19575
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
19576
- REGNO (x) - R0_REGNUM);
19577
+ asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
19581
if (REG_P (x) && REGNO (x) == SP_REGNUM)
19583
- asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
19584
+ asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
19588
@@ -4601,6 +4700,101 @@
19589
return aarch64_tune_params->memmov_cost;
19592
+/* Vectorizer cost model target hooks. */
19594
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
19596
+aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
19598
+ int misalign ATTRIBUTE_UNUSED)
19600
+ unsigned elements;
19602
+ switch (type_of_cost)
19604
+ case scalar_stmt:
19605
+ return aarch64_tune_params->vec_costs->scalar_stmt_cost;
19607
+ case scalar_load:
19608
+ return aarch64_tune_params->vec_costs->scalar_load_cost;
19610
+ case scalar_store:
19611
+ return aarch64_tune_params->vec_costs->scalar_store_cost;
19613
+ case vector_stmt:
19614
+ return aarch64_tune_params->vec_costs->vec_stmt_cost;
19616
+ case vector_load:
19617
+ return aarch64_tune_params->vec_costs->vec_align_load_cost;
19619
+ case vector_store:
19620
+ return aarch64_tune_params->vec_costs->vec_store_cost;
19622
+ case vec_to_scalar:
19623
+ return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
19625
+ case scalar_to_vec:
19626
+ return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
19628
+ case unaligned_load:
19629
+ return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
19631
+ case unaligned_store:
19632
+ return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
19634
+ case cond_branch_taken:
19635
+ return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
19637
+ case cond_branch_not_taken:
19638
+ return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
19641
+ case vec_promote_demote:
19642
+ return aarch64_tune_params->vec_costs->vec_stmt_cost;
19644
+ case vec_construct:
19645
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
19646
+ return elements / 2 + 1;
19649
+ gcc_unreachable ();
19653
+/* Implement targetm.vectorize.add_stmt_cost. */
19655
+aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
19656
+ struct _stmt_vec_info *stmt_info, int misalign,
19657
+ enum vect_cost_model_location where)
19659
+ unsigned *cost = (unsigned *) data;
19660
+ unsigned retval = 0;
19662
+ if (flag_vect_cost_model)
19664
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
19666
+ aarch64_builtin_vectorization_cost (kind, vectype, misalign);
19668
+ /* Statements in an inner loop relative to the loop being
19669
+ vectorized are weighted more heavily. The value here is
19670
+ a function (linear for now) of the loop nest level. */
19671
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
19673
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
19674
+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
19675
+ unsigned nest_level = loop_depth (loop);
19677
+ count *= nest_level;
19680
+ retval = (unsigned) (count * stmt_cost);
19681
+ cost[where] += retval;
19687
static void initialize_aarch64_code_model (void);
19689
/* Parse the architecture extension string. */
19690
@@ -4956,6 +5150,7 @@
19692
/* Return the method that should be used to access SYMBOL_REF or
19693
LABEL_REF X in context CONTEXT. */
19695
enum aarch64_symbol_type
19696
aarch64_classify_symbol (rtx x,
19697
enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
19698
@@ -4969,6 +5164,8 @@
19700
case AARCH64_CMODEL_TINY_PIC:
19701
case AARCH64_CMODEL_TINY:
19702
+ return SYMBOL_TINY_ABSOLUTE;
19704
case AARCH64_CMODEL_SMALL_PIC:
19705
case AARCH64_CMODEL_SMALL:
19706
return SYMBOL_SMALL_ABSOLUTE;
19707
@@ -4978,71 +5175,47 @@
19711
- gcc_assert (GET_CODE (x) == SYMBOL_REF);
19713
- switch (aarch64_cmodel)
19714
+ if (GET_CODE (x) == SYMBOL_REF)
19716
- case AARCH64_CMODEL_LARGE:
19717
- return SYMBOL_FORCE_TO_MEM;
19719
- case AARCH64_CMODEL_TINY:
19720
- case AARCH64_CMODEL_SMALL:
19722
- /* This is needed to get DFmode, TImode constants to be loaded off
19723
- the constant pool. Is it necessary to dump TImode values into
19724
- the constant pool. We don't handle TImode constant loads properly
19725
- yet and hence need to use the constant pool. */
19726
- if (CONSTANT_POOL_ADDRESS_P (x))
19727
+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE
19728
+ || CONSTANT_POOL_ADDRESS_P (x))
19729
return SYMBOL_FORCE_TO_MEM;
19731
if (aarch64_tls_symbol_p (x))
19732
return aarch64_classify_tls_symbol (x);
19734
- if (SYMBOL_REF_WEAK (x))
19735
- return SYMBOL_FORCE_TO_MEM;
19736
+ switch (aarch64_cmodel)
19738
+ case AARCH64_CMODEL_TINY:
19739
+ if (SYMBOL_REF_WEAK (x))
19740
+ return SYMBOL_FORCE_TO_MEM;
19741
+ return SYMBOL_TINY_ABSOLUTE;
19743
- return SYMBOL_SMALL_ABSOLUTE;
19744
+ case AARCH64_CMODEL_SMALL:
19745
+ if (SYMBOL_REF_WEAK (x))
19746
+ return SYMBOL_FORCE_TO_MEM;
19747
+ return SYMBOL_SMALL_ABSOLUTE;
19749
- case AARCH64_CMODEL_TINY_PIC:
19750
- case AARCH64_CMODEL_SMALL_PIC:
19751
+ case AARCH64_CMODEL_TINY_PIC:
19752
+ if (!aarch64_symbol_binds_local_p (x))
19753
+ return SYMBOL_SMALL_GOT;
19754
+ return SYMBOL_TINY_ABSOLUTE;
19756
- if (CONSTANT_POOL_ADDRESS_P (x))
19757
- return SYMBOL_FORCE_TO_MEM;
19758
+ case AARCH64_CMODEL_SMALL_PIC:
19759
+ if (!aarch64_symbol_binds_local_p (x))
19760
+ return SYMBOL_SMALL_GOT;
19761
+ return SYMBOL_SMALL_ABSOLUTE;
19763
- if (aarch64_tls_symbol_p (x))
19764
- return aarch64_classify_tls_symbol (x);
19766
+ gcc_unreachable ();
19770
- if (!aarch64_symbol_binds_local_p (x))
19771
- return SYMBOL_SMALL_GOT;
19773
- return SYMBOL_SMALL_ABSOLUTE;
19776
- gcc_unreachable ();
19778
/* By default push everything into the constant pool. */
19779
return SYMBOL_FORCE_TO_MEM;
19782
-/* Return true if X is a symbolic constant that can be used in context
19783
- CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
19786
-aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
19787
- enum aarch64_symbol_type *symbol_type)
19790
- split_const (x, &x, &offset);
19791
- if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
19792
- *symbol_type = aarch64_classify_symbol (x, context);
19796
- /* No checking of offset at this point. */
19801
aarch64_constant_address_p (rtx x)
19803
return (CONSTANT_P (x) && memory_address_p (DImode, x));
19804
@@ -5092,8 +5265,7 @@
19805
/* This could probably go away because
19806
we now decompose CONST_INTs according to expand_mov_immediate. */
19807
if ((GET_CODE (x) == CONST_VECTOR
19808
- && aarch64_simd_valid_immediate (x, mode, false,
19809
- NULL, NULL, NULL, NULL, NULL) != -1)
19810
+ && aarch64_simd_valid_immediate (x, mode, false, NULL))
19811
|| CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
19812
return !targetm.cannot_force_const_mem (mode, x);
19814
@@ -5924,32 +6096,57 @@
19818
-/* Return quad mode as the preferred SIMD mode. */
19819
+/* Return appropriate SIMD container
19820
+ for MODE within a vector of WIDTH bits. */
19821
static enum machine_mode
19822
-aarch64_preferred_simd_mode (enum machine_mode mode)
19823
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
19825
+ gcc_assert (width == 64 || width == 128);
19838
- return V16QImode;
19846
+ if (width == 128)
19858
+ return V16QImode;
19882
+/* Return 128-bit container as the preferred SIMD mode for MODE. */
19883
+static enum machine_mode
19884
+aarch64_preferred_simd_mode (enum machine_mode mode)
19886
+ return aarch64_simd_container_mode (mode, 128);
19889
/* Return the bitmask of possible vector sizes for the vectorizer
19890
to iterate over. */
19891
static unsigned int
19892
@@ -6037,7 +6234,7 @@
19895
/* Return the equivalent letter for size. */
19896
-static unsigned char
19898
sizetochar (int size)
19901
@@ -6084,15 +6281,10 @@
19902
return aarch64_float_const_representable_p (x0);
19905
-/* TODO: This function returns values similar to those
19906
- returned by neon_valid_immediate in gcc/config/arm/arm.c
19907
- but the API here is different enough that these magic numbers
19908
- are not used. It should be sufficient to return true or false. */
19910
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
19911
- rtx *modconst, int *elementwidth,
19912
- unsigned char *elementchar,
19913
- int *mvn, int *shift)
19914
+/* Return true for valid and false for invalid. */
19916
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
19917
+ struct simd_immediate_info *info)
19919
#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
19921
@@ -6103,7 +6295,6 @@
19923
immtype = (CLASS); \
19924
elsize = (ELSIZE); \
19925
- elchar = sizetochar (elsize); \
19926
eshift = (SHIFT); \
19929
@@ -6112,36 +6303,25 @@
19930
unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
19931
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
19932
unsigned char bytes[16];
19933
- unsigned char elchar = 0;
19934
int immtype = -1, matches;
19935
unsigned int invmask = inverse ? 0xff : 0;
19938
if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19940
- bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
19941
- int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
19942
+ if (! (aarch64_simd_imm_zero_p (op, mode)
19943
+ || aarch64_vect_float_const_representable_p (op)))
19946
- if (!(simd_imm_zero
19947
- || aarch64_vect_float_const_representable_p (op)))
19951
+ info->value = CONST_VECTOR_ELT (op, 0);
19952
+ info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
19953
+ info->mvn = false;
19958
- *modconst = CONST_VECTOR_ELT (op, 0);
19960
- if (elementwidth)
19961
- *elementwidth = elem_width;
19964
- *elementchar = sizetochar (elem_width);
19969
- if (simd_imm_zero)
19976
/* Splat vector constant out into a byte vector. */
19977
@@ -6215,16 +6395,16 @@
19978
CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
19980
CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
19981
- && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
19982
+ && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
19984
CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
19985
- && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
19986
+ && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
19988
CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
19989
- && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
19990
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
19992
CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
19993
- && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
19994
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
19996
CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
19998
@@ -6233,31 +6413,20 @@
20002
- /* TODO: Currently the assembler cannot handle types 12 to 15.
20003
- And there is no way to specify cmode through the compiler.
20004
- Disable them till there is support in the assembler. */
20005
- if (immtype == -1
20006
- || (immtype >= 12 && immtype <= 15)
20007
- || immtype == 18)
20009
+ if (immtype == -1)
20014
+ info->element_width = elsize;
20015
+ info->mvn = emvn != 0;
20016
+ info->shift = eshift;
20018
- if (elementwidth)
20019
- *elementwidth = elsize;
20020
+ unsigned HOST_WIDE_INT imm = 0;
20023
- *elementchar = elchar;
20024
+ if (immtype >= 12 && immtype <= 15)
20025
+ info->msl = true;
20035
- unsigned HOST_WIDE_INT imm = 0;
20037
/* Un-invert bytes of recognized vector, if necessary. */
20039
for (i = 0; i < idx; i++)
20040
@@ -6272,68 +6441,27 @@
20041
imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
20042
<< (i * BITS_PER_UNIT);
20044
- *modconst = GEN_INT (imm);
20047
+ info->value = GEN_INT (imm);
20051
- unsigned HOST_WIDE_INT imm = 0;
20053
+ for (i = 0; i < elsize / BITS_PER_UNIT; i++)
20054
+ imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
20056
- for (i = 0; i < elsize / BITS_PER_UNIT; i++)
20057
- imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
20059
/* Construct 'abcdefgh' because the assembler cannot handle
20060
- generic constants. */
20061
- gcc_assert (shift != NULL && mvn != NULL);
20063
+ generic constants. */
20066
- imm = (imm >> *shift) & 0xff;
20067
- *modconst = GEN_INT (imm);
20069
+ imm = (imm >> info->shift) & 0xff;
20070
+ info->value = GEN_INT (imm);
20079
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
20080
- (or, implicitly, MVNI) immediate. Write back width per element
20081
- to *ELEMENTWIDTH, and a modified constant (whatever should be output
20082
- for a MOVI instruction) in *MODCONST. */
20084
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
20085
- rtx *modconst, int *elementwidth,
20086
- unsigned char *elementchar,
20087
- int *mvn, int *shift)
20091
- unsigned char tmpwidthc;
20092
- int tmpmvn = 0, tmpshift = 0;
20093
- int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
20094
- &tmpwidth, &tmpwidthc,
20095
- &tmpmvn, &tmpshift);
20097
- if (retval == -1)
20101
- *modconst = tmpconst;
20103
- if (elementwidth)
20104
- *elementwidth = tmpwidth;
20107
- *elementchar = tmpwidthc;
20113
- *shift = tmpshift;
20119
aarch64_const_vec_all_same_int_p (rtx x,
20120
HOST_WIDE_INT minval,
20121
@@ -6395,6 +6523,25 @@
20126
+aarch64_mov_operand_p (rtx x,
20127
+ enum aarch64_symbol_context context,
20128
+ enum machine_mode mode)
20130
+ if (GET_CODE (x) == HIGH
20131
+ && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
20134
+ if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
20137
+ if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
20140
+ return aarch64_classify_symbolic_expression (x, context)
20141
+ == SYMBOL_TINY_ABSOLUTE;
20144
/* Return a const_int vector of VAL. */
20146
aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
20147
@@ -6409,6 +6556,19 @@
20148
return gen_rtx_CONST_VECTOR (mode, v);
20151
+/* Check OP is a legal scalar immediate for the MOVI instruction. */
20154
+aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
20156
+ enum machine_mode vmode;
20158
+ gcc_assert (!VECTOR_MODE_P (mode));
20159
+ vmode = aarch64_preferred_simd_mode (mode);
20160
+ rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
20161
+ return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
20164
/* Construct and return a PARALLEL RTX vector. */
20166
aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
20167
@@ -6634,8 +6794,7 @@
20168
gcc_unreachable ();
20170
if (const_vec != NULL_RTX
20171
- && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
20172
- NULL, NULL, NULL))
20173
+ && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
20174
/* Load using MOVI/MVNI. */
20176
else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
20177
@@ -7193,49 +7352,80 @@
20181
-aarch64_output_simd_mov_immediate (rtx *const_vector,
20182
+aarch64_output_simd_mov_immediate (rtx const_vector,
20183
enum machine_mode mode,
20187
- unsigned char widthc;
20188
- int lane_width_bits;
20190
static char templ[40];
20191
- int shift = 0, mvn = 0;
20192
const char *mnemonic;
20193
+ const char *shift_op;
20194
unsigned int lane_count = 0;
20195
+ char element_char;
20198
- aarch64_simd_immediate_valid_for_move (*const_vector, mode,
20199
- const_vector, &lane_width_bits,
20200
- &widthc, &mvn, &shift);
20201
+ struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
20203
+ /* This will return true to show const_vector is legal for use as either
20204
+ a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
20205
+ also update INFO to show how the immediate should be generated. */
20206
+ is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
20207
gcc_assert (is_valid);
20209
+ element_char = sizetochar (info.element_width);
20210
+ lane_count = width / info.element_width;
20212
mode = GET_MODE_INNER (mode);
20213
if (mode == SFmode || mode == DFmode)
20216
- aarch64_float_const_zero_rtx_p (*const_vector);
20217
- gcc_assert (shift == 0);
20218
- mnemonic = zero_p ? "movi" : "fmov";
20219
+ gcc_assert (info.shift == 0 && ! info.mvn);
20220
+ if (aarch64_float_const_zero_rtx_p (info.value))
20221
+ info.value = GEN_INT (0);
20224
+#define buf_size 20
20225
+ REAL_VALUE_TYPE r;
20226
+ REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
20227
+ char float_buf[buf_size] = {'\0'};
20228
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
20231
+ if (lane_count == 1)
20232
+ snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
20234
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
20235
+ lane_count, element_char, float_buf);
20240
- mnemonic = mvn ? "mvni" : "movi";
20242
- gcc_assert (lane_width_bits != 0);
20243
- lane_count = width / lane_width_bits;
20244
+ mnemonic = info.mvn ? "mvni" : "movi";
20245
+ shift_op = info.msl ? "msl" : "lsl";
20247
if (lane_count == 1)
20248
- snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
20250
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
20251
- mnemonic, lane_count, widthc, shift);
20252
+ snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
20253
+ mnemonic, UINTVAL (info.value));
20254
+ else if (info.shift)
20255
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
20256
+ ", %s %d", mnemonic, lane_count, element_char,
20257
+ UINTVAL (info.value), shift_op, info.shift);
20259
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
20260
- mnemonic, lane_count, widthc);
20261
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
20262
+ mnemonic, lane_count, element_char, UINTVAL (info.value));
20267
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
20268
+ enum machine_mode mode)
20270
+ enum machine_mode vmode;
20272
+ gcc_assert (!VECTOR_MODE_P (mode));
20273
+ vmode = aarch64_simd_container_mode (mode, 64);
20274
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
20275
+ return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
20278
/* Split operands into moves from op[1] + op[2] into op[0]. */
20281
@@ -7860,6 +8050,9 @@
20282
#undef TARGET_EXPAND_BUILTIN_VA_START
20283
#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
20285
+#undef TARGET_FOLD_BUILTIN
20286
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
20288
#undef TARGET_FUNCTION_ARG
20289
#define TARGET_FUNCTION_ARG aarch64_function_arg
20291
@@ -7881,6 +8074,9 @@
20292
#undef TARGET_FRAME_POINTER_REQUIRED
20293
#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
20295
+#undef TARGET_GIMPLE_FOLD_BUILTIN
20296
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
20298
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
20299
#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
20301
@@ -7960,6 +8156,13 @@
20302
#undef TARGET_ARRAY_MODE_SUPPORTED_P
20303
#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
20305
+#undef TARGET_VECTORIZE_ADD_STMT_COST
20306
+#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
20308
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
20309
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
20310
+ aarch64_builtin_vectorization_cost
20312
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
20313
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
20315
--- a/src/gcc/config/aarch64/iterators.md
20316
+++ b/src/gcc/config/aarch64/iterators.md
20318
;; Vector Float modes.
20319
(define_mode_iterator VDQF [V2SF V4SF V2DF])
20321
+;; Modes suitable to use as the return type of a vcond expression.
20322
+(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
20324
+;; All Float modes.
20325
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
20327
;; Vector Float modes with 2 elements.
20328
(define_mode_iterator V2F [V2SF V2DF])
20330
@@ -160,10 +166,15 @@
20332
UNSPEC_ASHIFT_SIGNED ; Used in aarch-simd.md.
20333
UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md.
20334
+ UNSPEC_FMAX ; Used in aarch64-simd.md.
20335
+ UNSPEC_FMAXNMV ; Used in aarch64-simd.md.
20336
UNSPEC_FMAXV ; Used in aarch64-simd.md.
20337
+ UNSPEC_FMIN ; Used in aarch64-simd.md.
20338
+ UNSPEC_FMINNMV ; Used in aarch64-simd.md.
20339
UNSPEC_FMINV ; Used in aarch64-simd.md.
20340
UNSPEC_FADDV ; Used in aarch64-simd.md.
20341
- UNSPEC_ADDV ; Used in aarch64-simd.md.
20342
+ UNSPEC_SADDV ; Used in aarch64-simd.md.
20343
+ UNSPEC_UADDV ; Used in aarch64-simd.md.
20344
UNSPEC_SMAXV ; Used in aarch64-simd.md.
20345
UNSPEC_SMINV ; Used in aarch64-simd.md.
20346
UNSPEC_UMAXV ; Used in aarch64-simd.md.
20347
@@ -213,13 +224,6 @@
20348
UNSPEC_URSHL ; Used in aarch64-simd.md.
20349
UNSPEC_SQRSHL ; Used in aarch64-simd.md.
20350
UNSPEC_UQRSHL ; Used in aarch64-simd.md.
20351
- UNSPEC_CMEQ ; Used in aarch64-simd.md.
20352
- UNSPEC_CMLE ; Used in aarch64-simd.md.
20353
- UNSPEC_CMLT ; Used in aarch64-simd.md.
20354
- UNSPEC_CMGE ; Used in aarch64-simd.md.
20355
- UNSPEC_CMGT ; Used in aarch64-simd.md.
20356
- UNSPEC_CMHS ; Used in aarch64-simd.md.
20357
- UNSPEC_CMHI ; Used in aarch64-simd.md.
20358
UNSPEC_SSLI ; Used in aarch64-simd.md.
20359
UNSPEC_USLI ; Used in aarch64-simd.md.
20360
UNSPEC_SSRI ; Used in aarch64-simd.md.
20361
@@ -227,10 +231,6 @@
20362
UNSPEC_SSHLL ; Used in aarch64-simd.md.
20363
UNSPEC_USHLL ; Used in aarch64-simd.md.
20364
UNSPEC_ADDP ; Used in aarch64-simd.md.
20365
- UNSPEC_CMTST ; Used in aarch64-simd.md.
20366
- UNSPEC_FMAX ; Used in aarch64-simd.md.
20367
- UNSPEC_FMIN ; Used in aarch64-simd.md.
20368
- UNSPEC_BSL ; Used in aarch64-simd.md.
20369
UNSPEC_TBL ; Used in vector permute patterns.
20370
UNSPEC_CONCAT ; Used in vector permute patterns.
20371
UNSPEC_ZIP1 ; Used in vector permute patterns.
20372
@@ -249,8 +249,12 @@
20373
;; 32-bit version and "%x0" in the 64-bit version.
20374
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
20376
+;; For constraints used in scalar immediate vector moves
20377
+(define_mode_attr hq [(HI "h") (QI "q")])
20379
;; For scalar usage of vector/FP registers
20380
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
20381
+ (SF "s") (DF "d")
20382
(V8QI "") (V16QI "")
20383
(V4HI "") (V8HI "")
20384
(V2SI "") (V4SI "")
20385
@@ -305,7 +309,8 @@
20386
(V4SF ".4s") (V2DF ".2d")
20393
;; Register suffix narrowed modes for VQN.
20394
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
20395
@@ -444,7 +449,8 @@
20396
(V2SI "V2SI") (V4SI "V4SI")
20397
(DI "DI") (V2DI "V2DI")
20398
(V2SF "V2SI") (V4SF "V4SI")
20400
+ (V2DF "V2DI") (DF "DI")
20403
;; Lower case mode of results of comparison operations.
20404
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
20405
@@ -452,7 +458,8 @@
20406
(V2SI "v2si") (V4SI "v4si")
20407
(DI "di") (V2DI "v2di")
20408
(V2SF "v2si") (V4SF "v4si")
20410
+ (V2DF "v2di") (DF "di")
20413
;; Vm for lane instructions is restricted to FP_LO_REGS.
20414
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
20415
@@ -528,9 +535,14 @@
20416
;; Iterator for integer conversions
20417
(define_code_iterator FIXUORS [fix unsigned_fix])
20419
+;; Iterator for float conversions
20420
+(define_code_iterator FLOATUORS [float unsigned_float])
20422
;; Code iterator for variants of vector max and min.
20423
(define_code_iterator MAXMIN [smax smin umax umin])
20425
+(define_code_iterator FMAXMIN [smax smin])
20427
;; Code iterator for variants of vector max and min.
20428
(define_code_iterator ADDSUB [plus minus])
20430
@@ -543,6 +555,15 @@
20431
;; Code iterator for signed variants of vector saturating binary ops.
20432
(define_code_iterator SBINQOPS [ss_plus ss_minus])
20434
+;; Comparison operators for <F>CM.
20435
+(define_code_iterator COMPARISONS [lt le eq ge gt])
20437
+;; Unsigned comparison operators.
20438
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
20440
+;; Unsigned comparison operators.
20441
+(define_code_iterator FAC_COMPARISONS [lt le ge gt])
20443
;; -------------------------------------------------------------------
20445
;; -------------------------------------------------------------------
20446
@@ -555,6 +576,10 @@
20447
(zero_extend "zero_extend")
20448
(sign_extract "extv")
20449
(zero_extract "extzv")
20451
+ (unsigned_fix "fixuns")
20453
+ (unsigned_float "floatuns")
20457
@@ -571,12 +596,37 @@
20470
+;; For comparison operators we use the FCM* and CM* instructions.
20471
+;; As there are no CMLE or CMLT instructions which act on 3 vector
20472
+;; operands, we must use CMGE or CMGT and swap the order of the
20473
+;; source operands.
20475
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
20476
+ (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
20477
+(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
20478
+ (ltu "2") (leu "2") (geu "1") (gtu "1")])
20479
+(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
20480
+ (ltu "1") (leu "1") (geu "2") (gtu "2")])
20482
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
20483
+ (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
20485
+(define_code_attr fix_trunc_optab [(fix "fix_trunc")
20486
+ (unsigned_fix "fixuns_trunc")])
20488
;; Optab prefix for sign/zero-extending operations
20489
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
20490
(div "") (udiv "u")
20491
(fix "") (unsigned_fix "u")
20492
+ (float "s") (unsigned_float "u")
20493
(ss_plus "s") (us_plus "u")
20494
(ss_minus "s") (us_minus "u")])
20496
@@ -601,7 +651,9 @@
20497
(define_code_attr su [(sign_extend "s") (zero_extend "u")
20498
(sign_extract "s") (zero_extract "u")
20499
(fix "s") (unsigned_fix "u")
20500
- (div "s") (udiv "u")])
20501
+ (div "s") (udiv "u")
20502
+ (smax "s") (umax "u")
20503
+ (smin "s") (umin "u")])
20505
;; Emit cbz/cbnz depending on comparison type.
20506
(define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
20507
@@ -610,10 +662,10 @@
20508
(define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
20510
;; Max/min attributes.
20511
-(define_code_attr maxmin [(smax "smax")
20515
+(define_code_attr maxmin [(smax "max")
20520
;; MLA/MLS attributes.
20521
(define_code_attr as [(ss_plus "a") (ss_minus "s")])
20522
@@ -635,8 +687,11 @@
20523
(define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
20524
UNSPEC_SMAXV UNSPEC_SMINV])
20526
-(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV])
20527
+(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
20528
+ UNSPEC_FMAXNMV UNSPEC_FMINNMV])
20530
+(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
20532
(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
20533
UNSPEC_SRHADD UNSPEC_URHADD
20534
UNSPEC_SHSUB UNSPEC_UHSUB
20535
@@ -649,7 +704,7 @@
20536
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
20537
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
20539
-(define_int_iterator FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
20540
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
20542
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
20544
@@ -680,35 +735,44 @@
20545
UNSPEC_SQSHRN UNSPEC_UQSHRN
20546
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
20548
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
20549
- UNSPEC_CMLE UNSPEC_CMLT])
20551
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
20553
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
20554
UNSPEC_TRN1 UNSPEC_TRN2
20555
UNSPEC_UZP1 UNSPEC_UZP2])
20557
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
20558
- UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
20559
+ UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
20562
(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
20564
+ UNSPEC_FRINTA UNSPEC_FRINTN])
20566
+(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
20568
;; -------------------------------------------------------------------
20569
;; Int Iterators Attributes.
20570
;; -------------------------------------------------------------------
20571
-(define_int_attr maxminv [(UNSPEC_UMAXV "umax")
20572
- (UNSPEC_UMINV "umin")
20573
- (UNSPEC_SMAXV "smax")
20574
- (UNSPEC_SMINV "smin")])
20575
+(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
20576
+ (UNSPEC_UMINV "umin")
20577
+ (UNSPEC_SMAXV "smax")
20578
+ (UNSPEC_SMINV "smin")
20579
+ (UNSPEC_FMAX "smax_nan")
20580
+ (UNSPEC_FMAXNMV "smax")
20581
+ (UNSPEC_FMAXV "smax_nan")
20582
+ (UNSPEC_FMIN "smin_nan")
20583
+ (UNSPEC_FMINNMV "smin")
20584
+ (UNSPEC_FMINV "smin_nan")])
20586
-(define_int_attr fmaxminv [(UNSPEC_FMAXV "max")
20587
- (UNSPEC_FMINV "min")])
20588
+(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
20589
+ (UNSPEC_UMINV "umin")
20590
+ (UNSPEC_SMAXV "smax")
20591
+ (UNSPEC_SMINV "smin")
20592
+ (UNSPEC_FMAX "fmax")
20593
+ (UNSPEC_FMAXNMV "fmaxnm")
20594
+ (UNSPEC_FMAXV "fmax")
20595
+ (UNSPEC_FMIN "fmin")
20596
+ (UNSPEC_FMINNMV "fminnm")
20597
+ (UNSPEC_FMINV "fmin")])
20599
-(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax")
20600
- (UNSPEC_FMIN "fmin")])
20602
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
20603
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
20604
(UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
20605
@@ -719,6 +783,7 @@
20606
(UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
20607
(UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
20608
(UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
20609
+ (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
20610
(UNSPEC_SSLI "s") (UNSPEC_USLI "u")
20611
(UNSPEC_SSRI "s") (UNSPEC_USRI "u")
20612
(UNSPEC_USRA "u") (UNSPEC_SSRA "s")
20613
@@ -768,12 +833,6 @@
20614
(UNSPEC_RADDHN2 "add")
20615
(UNSPEC_RSUBHN2 "sub")])
20617
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
20618
- (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
20619
- (UNSPEC_CMEQ "eq")
20620
- (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
20621
- (UNSPEC_CMTST "tst")])
20623
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
20624
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])
20626
@@ -783,15 +842,18 @@
20627
(UNSPEC_FRINTM "floor")
20628
(UNSPEC_FRINTI "nearbyint")
20629
(UNSPEC_FRINTX "rint")
20630
- (UNSPEC_FRINTA "round")])
20631
+ (UNSPEC_FRINTA "round")
20632
+ (UNSPEC_FRINTN "frintn")])
20634
;; frint suffix for floating-point rounding instructions.
20635
(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p")
20636
(UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i")
20637
- (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")])
20638
+ (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")
20639
+ (UNSPEC_FRINTN "n")])
20641
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
20642
- (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
20643
+ (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
20644
+ (UNSPEC_FRINTN "frintn")])
20646
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
20647
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
20648
@@ -800,3 +862,5 @@
20649
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
20650
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
20651
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
20653
+(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
20654
--- a/src/gcc/config/aarch64/aarch64.h
20655
+++ b/src/gcc/config/aarch64/aarch64.h
20656
@@ -521,12 +521,6 @@
20660
-/* Which ABI to use. */
20668
ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */
20669
@@ -534,11 +528,7 @@
20673
-extern enum arm_abi_type arm_abi;
20674
extern enum arm_pcs arm_pcs_variant;
20675
-#ifndef ARM_DEFAULT_ABI
20676
-#define ARM_DEFAULT_ABI ARM_ABI_AAPCS64
20679
#ifndef ARM_DEFAULT_PCS
20680
#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
20681
@@ -709,6 +699,8 @@
20683
#define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y)
20685
+#define REVERSIBLE_CC_MODE(MODE) 1
20687
#define REVERSE_CONDITION(CODE, MODE) \
20688
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
20689
? reverse_condition_maybe_unordered (CODE) \
20690
--- a/src/gcc/config/arm/arm1020e.md
20691
+++ b/src/gcc/config/arm/arm1020e.md
20693
;; until after the memory stage.
20694
(define_insn_reservation "1020mult1" 2
20695
(and (eq_attr "tune" "arm1020e,arm1022e")
20696
- (eq_attr "insn" "smulxy,smulwy"))
20697
+ (eq_attr "type" "smulxy,smulwy"))
20698
"1020a_e,1020a_m,1020a_w")
20700
;; The "smlaxy" and "smlawx" instructions require two iterations through
20701
@@ -104,7 +104,7 @@
20702
;; the execute stage.
20703
(define_insn_reservation "1020mult2" 2
20704
(and (eq_attr "tune" "arm1020e,arm1022e")
20705
- (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
20706
+ (eq_attr "type" "smlaxy,smlalxy,smlawx"))
20707
"1020a_e*2,1020a_m,1020a_w")
20709
;; The "smlalxy", "mul", and "mla" instructions require two iterations
20710
@@ -112,7 +112,7 @@
20711
;; the memory stage.
20712
(define_insn_reservation "1020mult3" 3
20713
(and (eq_attr "tune" "arm1020e,arm1022e")
20714
- (eq_attr "insn" "smlalxy,mul,mla"))
20715
+ (eq_attr "type" "smlalxy,mul,mla"))
20716
"1020a_e*2,1020a_m,1020a_w")
20718
;; The "muls" and "mlas" instructions loop in the execute stage for
20719
@@ -120,7 +120,7 @@
20720
;; available after three iterations.
20721
(define_insn_reservation "1020mult4" 3
20722
(and (eq_attr "tune" "arm1020e,arm1022e")
20723
- (eq_attr "insn" "muls,mlas"))
20724
+ (eq_attr "type" "muls,mlas"))
20725
"1020a_e*4,1020a_m,1020a_w")
20727
;; Long multiply instructions that produce two registers of
20728
@@ -135,7 +135,7 @@
20729
;; available after the memory cycle.
20730
(define_insn_reservation "1020mult5" 4
20731
(and (eq_attr "tune" "arm1020e,arm1022e")
20732
- (eq_attr "insn" "umull,umlal,smull,smlal"))
20733
+ (eq_attr "type" "umull,umlal,smull,smlal"))
20734
"1020a_e*3,1020a_m,1020a_w")
20736
;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
20737
@@ -143,7 +143,7 @@
20738
;; The value result is available after four iterations.
20739
(define_insn_reservation "1020mult6" 4
20740
(and (eq_attr "tune" "arm1020e,arm1022e")
20741
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
20742
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
20743
"1020a_e*5,1020a_m,1020a_w")
20745
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20746
--- a/src/gcc/config/arm/cortex-a15.md
20747
+++ b/src/gcc/config/arm/cortex-a15.md
20748
@@ -87,28 +87,26 @@
20749
;; 32-bit multiplies
20750
(define_insn_reservation "cortex_a15_mult32" 3
20751
(and (eq_attr "tune" "cortexa15")
20752
- (and (eq_attr "type" "mult")
20753
- (and (eq_attr "neon_type" "none")
20754
- (eq_attr "mul64" "no"))))
20755
+ (and (eq_attr "mul32" "yes")
20756
+ (eq_attr "neon_type" "none")))
20757
"ca15_issue1,ca15_mx")
20759
;; 64-bit multiplies
20760
(define_insn_reservation "cortex_a15_mult64" 4
20761
(and (eq_attr "tune" "cortexa15")
20762
- (and (eq_attr "type" "mult")
20763
- (and (eq_attr "neon_type" "none")
20764
- (eq_attr "mul64" "yes"))))
20765
+ (and (eq_attr "mul64" "yes")
20766
+ (eq_attr "neon_type" "none")))
20767
"ca15_issue1,ca15_mx*2")
20770
(define_insn_reservation "cortex_a15_udiv" 9
20771
(and (eq_attr "tune" "cortexa15")
20772
- (eq_attr "insn" "udiv"))
20773
+ (eq_attr "type" "udiv"))
20774
"ca15_issue1,ca15_mx")
20776
(define_insn_reservation "cortex_a15_sdiv" 10
20777
(and (eq_attr "tune" "cortexa15")
20778
- (eq_attr "insn" "sdiv"))
20779
+ (eq_attr "type" "sdiv"))
20780
"ca15_issue1,ca15_mx")
20782
;; Block all issue pipes for a cycle
20783
--- a/src/gcc/config/arm/arm-tables.opt
20784
+++ b/src/gcc/config/arm/arm-tables.opt
20785
@@ -250,6 +250,9 @@
20786
Enum(processor_type) String(cortex-a15) Value(cortexa15)
20789
+Enum(processor_type) String(cortex-a53) Value(cortexa53)
20792
Enum(processor_type) String(cortex-r4) Value(cortexr4)
20795
@@ -259,6 +262,9 @@
20796
Enum(processor_type) String(cortex-r5) Value(cortexr5)
20799
+Enum(processor_type) String(cortex-r7) Value(cortexr7)
20802
Enum(processor_type) String(cortex-m4) Value(cortexm4)
20805
--- a/src/gcc/config/arm/arm1026ejs.md
20806
+++ b/src/gcc/config/arm/arm1026ejs.md
20808
;; until after the memory stage.
20809
(define_insn_reservation "mult1" 2
20810
(and (eq_attr "tune" "arm1026ejs")
20811
- (eq_attr "insn" "smulxy,smulwy"))
20812
+ (eq_attr "type" "smulxy,smulwy"))
20815
;; The "smlaxy" and "smlawx" instructions require two iterations through
20816
@@ -104,7 +104,7 @@
20817
;; the execute stage.
20818
(define_insn_reservation "mult2" 2
20819
(and (eq_attr "tune" "arm1026ejs")
20820
- (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
20821
+ (eq_attr "type" "smlaxy,smlalxy,smlawx"))
20824
;; The "smlalxy", "mul", and "mla" instructions require two iterations
20825
@@ -112,7 +112,7 @@
20826
;; the memory stage.
20827
(define_insn_reservation "mult3" 3
20828
(and (eq_attr "tune" "arm1026ejs")
20829
- (eq_attr "insn" "smlalxy,mul,mla"))
20830
+ (eq_attr "type" "smlalxy,mul,mla"))
20833
;; The "muls" and "mlas" instructions loop in the execute stage for
20834
@@ -120,7 +120,7 @@
20835
;; available after three iterations.
20836
(define_insn_reservation "mult4" 3
20837
(and (eq_attr "tune" "arm1026ejs")
20838
- (eq_attr "insn" "muls,mlas"))
20839
+ (eq_attr "type" "muls,mlas"))
20842
;; Long multiply instructions that produce two registers of
20843
@@ -135,7 +135,7 @@
20844
;; available after the memory cycle.
20845
(define_insn_reservation "mult5" 4
20846
(and (eq_attr "tune" "arm1026ejs")
20847
- (eq_attr "insn" "umull,umlal,smull,smlal"))
20848
+ (eq_attr "type" "umull,umlal,smull,smlal"))
20851
;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
20852
@@ -143,7 +143,7 @@
20853
;; The value result is available after four iterations.
20854
(define_insn_reservation "mult6" 4
20855
(and (eq_attr "tune" "arm1026ejs")
20856
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
20857
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
20860
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20861
--- a/src/gcc/config/arm/arm1136jfs.md
20862
+++ b/src/gcc/config/arm/arm1136jfs.md
20863
@@ -129,13 +129,13 @@
20864
;; Multiply and multiply-accumulate results are available after four stages.
20865
(define_insn_reservation "11_mult1" 4
20866
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20867
- (eq_attr "insn" "mul,mla"))
20868
+ (eq_attr "type" "mul,mla"))
20869
"e_1*2,e_2,e_3,e_wb")
20871
;; The *S variants set the condition flags, which requires three more cycles.
20872
(define_insn_reservation "11_mult2" 4
20873
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20874
- (eq_attr "insn" "muls,mlas"))
20875
+ (eq_attr "type" "muls,mlas"))
20876
"e_1*2,e_2,e_3,e_wb")
20878
(define_bypass 3 "11_mult1,11_mult2"
20879
@@ -160,13 +160,13 @@
20880
;; the two multiply-accumulate instructions.
20881
(define_insn_reservation "11_mult3" 5
20882
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20883
- (eq_attr "insn" "smull,umull,smlal,umlal"))
20884
+ (eq_attr "type" "smull,umull,smlal,umlal"))
20885
"e_1*3,e_2,e_3,e_wb*2")
20887
;; The *S variants set the condition flags, which requires three more cycles.
20888
(define_insn_reservation "11_mult4" 5
20889
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20890
- (eq_attr "insn" "smulls,umulls,smlals,umlals"))
20891
+ (eq_attr "type" "smulls,umulls,smlals,umlals"))
20892
"e_1*3,e_2,e_3,e_wb*2")
20894
(define_bypass 4 "11_mult3,11_mult4"
20895
@@ -190,7 +190,8 @@
20897
(define_insn_reservation "11_mult5" 3
20898
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20899
- (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx"))
20900
+ (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\
20901
+ smusd,smusdx,smlsd,smlsdx"))
20902
"e_1,e_2,e_3,e_wb")
20904
(define_bypass 2 "11_mult5"
20905
@@ -211,14 +212,14 @@
20906
;; The same idea, then the 32-bit result is added to a 64-bit quantity.
20907
(define_insn_reservation "11_mult6" 4
20908
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20909
- (eq_attr "insn" "smlalxy"))
20910
+ (eq_attr "type" "smlalxy"))
20911
"e_1*2,e_2,e_3,e_wb*2")
20913
;; Signed 32x32 multiply, then the most significant 32 bits are extracted
20914
;; and are available after the memory stage.
20915
(define_insn_reservation "11_mult7" 4
20916
(and (eq_attr "tune" "arm1136js,arm1136jfs")
20917
- (eq_attr "insn" "smmul,smmulr"))
20918
+ (eq_attr "type" "smmul,smmulr"))
20919
"e_1*2,e_2,e_3,e_wb")
20921
(define_bypass 3 "11_mult6,11_mult7"
20922
--- a/src/gcc/config/arm/marvell-pj4.md
20923
+++ b/src/gcc/config/arm/marvell-pj4.md
20924
@@ -95,10 +95,14 @@
20925
"pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
20927
(define_insn_reservation "pj4_ir_mul" 3
20928
- (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "mult")) "pj4_is,pj4_mul,nothing*2,pj4_cp")
20929
+ (and (eq_attr "tune" "marvell_pj4")
20930
+ (ior (eq_attr "mul32" "yes")
20931
+ (eq_attr "mul64" "yes")))
20932
+ "pj4_is,pj4_mul,nothing*2,pj4_cp")
20934
(define_insn_reservation "pj4_ir_div" 20
20935
- (and (eq_attr "tune" "marvell_pj4") (eq_attr "insn" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
20936
+ (and (eq_attr "tune" "marvell_pj4")
20937
+ (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
20939
;; Branches and calls.
20941
--- a/src/gcc/config/arm/thumb2.md
20942
+++ b/src/gcc/config/arm/thumb2.md
20943
@@ -64,81 +64,167 @@
20944
(set_attr "type" "alu_shift")]
20947
-(define_insn "*thumb2_smaxsi3"
20948
+(define_insn_and_split "*thumb2_smaxsi3"
20949
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
20950
(smax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
20951
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
20952
(clobber (reg:CC CC_REGNUM))]
20955
- cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2
20956
- cmp\\t%1, %2\;it\\tge\;movge\\t%0, %1
20957
- cmp\\t%1, %2\;ite\\tge\;movge\\t%0, %1\;movlt\\t%0, %2"
20959
+ ; cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2
20960
+ ; cmp\\t%1, %2\;it\\tge\;movge\\t%0, %1
20961
+ ; cmp\\t%1, %2\;ite\\tge\;movge\\t%0, %1\;movlt\\t%0, %2
20963
+ [(set (reg:CC CC_REGNUM)
20964
+ (compare:CC (match_dup 1) (match_dup 2)))
20965
+ (set (match_dup 0)
20966
+ (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
20970
[(set_attr "conds" "clob")
20971
(set_attr "length" "10,10,14")]
20974
-(define_insn "*thumb2_sminsi3"
20975
+(define_insn_and_split "*thumb2_sminsi3"
20976
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
20977
(smin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
20978
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
20979
(clobber (reg:CC CC_REGNUM))]
20982
- cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2
20983
- cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %1
20984
- cmp\\t%1, %2\;ite\\tlt\;movlt\\t%0, %1\;movge\\t%0, %2"
20986
+ ; cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2
20987
+ ; cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %1
20988
+ ; cmp\\t%1, %2\;ite\\tlt\;movlt\\t%0, %1\;movge\\t%0, %2"
20990
+ [(set (reg:CC CC_REGNUM)
20991
+ (compare:CC (match_dup 1) (match_dup 2)))
20992
+ (set (match_dup 0)
20993
+ (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
20997
[(set_attr "conds" "clob")
20998
(set_attr "length" "10,10,14")]
21001
-(define_insn "*thumb32_umaxsi3"
21002
+(define_insn_and_split "*thumb32_umaxsi3"
21003
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
21004
(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
21005
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
21006
(clobber (reg:CC CC_REGNUM))]
21009
- cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2
21010
- cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %1
21011
- cmp\\t%1, %2\;ite\\tcs\;movcs\\t%0, %1\;movcc\\t%0, %2"
21013
+ ; cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2
21014
+ ; cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %1
21015
+ ; cmp\\t%1, %2\;ite\\tcs\;movcs\\t%0, %1\;movcc\\t%0, %2"
21017
+ [(set (reg:CC CC_REGNUM)
21018
+ (compare:CC (match_dup 1) (match_dup 2)))
21019
+ (set (match_dup 0)
21020
+ (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
21024
[(set_attr "conds" "clob")
21025
(set_attr "length" "10,10,14")]
21028
-(define_insn "*thumb2_uminsi3"
21029
+(define_insn_and_split "*thumb2_uminsi3"
21030
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
21031
(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
21032
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
21033
(clobber (reg:CC CC_REGNUM))]
21036
- cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2
21037
- cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %1
21038
- cmp\\t%1, %2\;ite\\tcc\;movcc\\t%0, %1\;movcs\\t%0, %2"
21040
+ ; cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2
21041
+ ; cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %1
21042
+ ; cmp\\t%1, %2\;ite\\tcc\;movcc\\t%0, %1\;movcs\\t%0, %2"
21044
+ [(set (reg:CC CC_REGNUM)
21045
+ (compare:CC (match_dup 1) (match_dup 2)))
21046
+ (set (match_dup 0)
21047
+ (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
21051
[(set_attr "conds" "clob")
21052
(set_attr "length" "10,10,14")]
21055
;; Thumb-2 does not have rsc, so use a clever trick with shifter operands.
21056
-(define_insn "*thumb2_negdi2"
21057
+(define_insn_and_split "*thumb2_negdi2"
21058
[(set (match_operand:DI 0 "s_register_operand" "=&r,r")
21059
(neg:DI (match_operand:DI 1 "s_register_operand" "?r,0")))
21060
(clobber (reg:CC CC_REGNUM))]
21062
- "negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1"
21063
+ "#" ; negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1
21064
+ "&& reload_completed"
21065
+ [(parallel [(set (reg:CC CC_REGNUM)
21066
+ (compare:CC (const_int 0) (match_dup 1)))
21067
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
21068
+ (set (match_dup 2) (minus:SI (minus:SI (match_dup 3)
21069
+ (ashift:SI (match_dup 3)
21071
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
21073
+ operands[2] = gen_highpart (SImode, operands[0]);
21074
+ operands[0] = gen_lowpart (SImode, operands[0]);
21075
+ operands[3] = gen_highpart (SImode, operands[1]);
21076
+ operands[1] = gen_lowpart (SImode, operands[1]);
21078
[(set_attr "conds" "clob")
21079
(set_attr "length" "8")]
21082
-(define_insn "*thumb2_abssi2"
21083
+(define_insn_and_split "*thumb2_abssi2"
21084
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
21085
(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
21086
(clobber (reg:CC CC_REGNUM))]
21089
- cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
21090
- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
21092
+ ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
21093
+ ; eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
21094
+ "&& reload_completed"
21097
+ /* if (which_alternative == 0) */
21098
+ if (REGNO(operands[0]) == REGNO(operands[1]))
21100
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
21102
+ emit_insn (gen_rtx_SET (VOIDmode,
21104
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
21105
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21106
+ (gen_rtx_LT (SImode,
21109
+ (gen_rtx_SET (VOIDmode,
21111
+ (gen_rtx_MINUS (SImode,
21113
+ operands[1]))))));
21117
+ emit_insn (gen_rtx_SET (VOIDmode,
21119
+ gen_rtx_XOR (SImode,
21120
+ gen_rtx_ASHIFTRT (SImode,
21124
+ emit_insn (gen_rtx_SET (VOIDmode,
21126
+ gen_rtx_MINUS (SImode,
21128
+ gen_rtx_ASHIFTRT (SImode,
21130
+ GEN_INT (31)))));
21134
[(set_attr "conds" "clob,*")
21135
(set_attr "shift" "1")
21136
(set_attr "predicable" "no, yes")
21137
@@ -146,14 +232,54 @@
21138
(set_attr "length" "10,8")]
21141
-(define_insn "*thumb2_neg_abssi2"
21142
+(define_insn_and_split "*thumb2_neg_abssi2"
21143
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
21144
(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
21145
(clobber (reg:CC CC_REGNUM))]
21148
- cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
21149
- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
21151
+ ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
21152
+ ; eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
21153
+ "&& reload_completed"
21156
+ /* if (which_alternative == 0) */
21157
+ if (REGNO(operands[0]) == REGNO(operands[1]))
21159
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
21161
+ emit_insn (gen_rtx_SET (VOIDmode,
21163
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
21164
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21165
+ (gen_rtx_GT (SImode,
21168
+ (gen_rtx_SET (VOIDmode,
21170
+ (gen_rtx_MINUS (SImode,
21172
+ operands[1]))))));
21176
+ emit_insn (gen_rtx_SET (VOIDmode,
21178
+ gen_rtx_XOR (SImode,
21179
+ gen_rtx_ASHIFTRT (SImode,
21183
+ emit_insn (gen_rtx_SET (VOIDmode,
21185
+ gen_rtx_MINUS (SImode,
21186
+ gen_rtx_ASHIFTRT (SImode,
21193
[(set_attr "conds" "clob,*")
21194
(set_attr "shift" "1")
21195
(set_attr "predicable" "no, yes")
21196
@@ -167,8 +293,8 @@
21197
;; regs. The high register alternatives are not taken into account when
21198
;; choosing register preferences in order to reflect their expense.
21199
(define_insn "*thumb2_movsi_insn"
21200
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m")
21201
- (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))]
21202
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
21203
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))]
21204
"TARGET_THUMB2 && ! TARGET_IWMMXT
21205
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
21206
&& ( register_operand (operands[0], SImode)
21207
@@ -176,16 +302,19 @@
21218
- [(set_attr "type" "*,*,simple_alu_imm,*,load1,load1,store1,store1")
21219
+ [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,simple_alu_imm,*,load1,load1,store1,store1")
21220
+ (set_attr "length" "2,4,2,4,4,4,4,4,4")
21221
(set_attr "predicable" "yes")
21222
- (set_attr "pool_range" "*,*,*,*,1018,4094,*,*")
21223
- (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")]
21224
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
21225
+ (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
21226
+ (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
21229
(define_insn "tls_load_dot_plus_four"
21230
@@ -236,54 +365,118 @@
21231
(set_attr "type" "alu_shift")]
21234
-(define_insn "*thumb2_mov_scc"
21235
+(define_insn_and_split "*thumb2_mov_scc"
21236
[(set (match_operand:SI 0 "s_register_operand" "=r")
21237
(match_operator:SI 1 "arm_comparison_operator"
21238
[(match_operand 2 "cc_register" "") (const_int 0)]))]
21240
- "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
21241
+ "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
21243
+ [(set (match_dup 0)
21244
+ (if_then_else:SI (match_dup 1)
21248
[(set_attr "conds" "use")
21249
(set_attr "length" "10")]
21252
-(define_insn "*thumb2_mov_negscc"
21253
+(define_insn_and_split "*thumb2_mov_negscc"
21254
[(set (match_operand:SI 0 "s_register_operand" "=r")
21255
(neg:SI (match_operator:SI 1 "arm_comparison_operator"
21256
[(match_operand 2 "cc_register" "") (const_int 0)])))]
21258
- "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
21259
+ "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
21261
+ [(set (match_dup 0)
21262
+ (if_then_else:SI (match_dup 1)
21266
+ operands[3] = GEN_INT (~0);
21268
[(set_attr "conds" "use")
21269
(set_attr "length" "10")]
21272
-(define_insn "*thumb2_mov_notscc"
21273
+(define_insn_and_split "*thumb2_mov_notscc"
21274
[(set (match_operand:SI 0 "s_register_operand" "=r")
21275
(not:SI (match_operator:SI 1 "arm_comparison_operator"
21276
[(match_operand 2 "cc_register" "") (const_int 0)])))]
21278
- "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
21279
+ "#" ; "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
21281
+ [(set (match_dup 0)
21282
+ (if_then_else:SI (match_dup 1)
21286
+ operands[3] = GEN_INT (~1);
21287
+ operands[4] = GEN_INT (~0);
21289
[(set_attr "conds" "use")
21290
(set_attr "length" "10")]
21293
-(define_insn "*thumb2_movsicc_insn"
21294
- [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
21295
+(define_insn_and_split "*thumb2_movsicc_insn"
21296
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,l")
21298
(match_operator 3 "arm_comparison_operator"
21299
[(match_operand 4 "cc_register" "") (const_int 0)])
21300
- (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
21301
- (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
21302
+ (match_operand:SI 1 "arm_not_operand" "0 ,Py,0 ,0,rI,K,rI,rI,K ,K,r,lPy")
21303
+ (match_operand:SI 2 "arm_not_operand" "Py,0 ,rI,K,0 ,0,rI,K ,rI,K,r,lPy")))]
21306
it\\t%D3\;mov%D3\\t%0, %2
21307
+ it\\t%d3\;mov%d3\\t%0, %1
21308
+ it\\t%D3\;mov%D3\\t%0, %2
21309
it\\t%D3\;mvn%D3\\t%0, #%B2
21310
it\\t%d3\;mov%d3\\t%0, %1
21311
it\\t%d3\;mvn%d3\\t%0, #%B1
21312
- ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
21313
- ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
21314
- ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
21315
- ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
21316
- [(set_attr "length" "6,6,6,6,10,10,10,10")
21323
+ ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
21324
+ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
21325
+ ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
21326
+ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
21327
+ ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
21328
+ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2"
21329
+ "&& reload_completed"
21332
+ enum rtx_code rev_code;
21333
+ enum machine_mode mode;
21336
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21338
+ gen_rtx_SET (VOIDmode,
21341
+ rev_code = GET_CODE (operands[3]);
21342
+ mode = GET_MODE (operands[4]);
21343
+ if (mode == CCFPmode || mode == CCFPEmode)
21344
+ rev_code = reverse_condition_maybe_unordered (rev_code);
21346
+ rev_code = reverse_condition (rev_code);
21348
+ rev_cond = gen_rtx_fmt_ee (rev_code,
21350
+ gen_rtx_REG (mode, CC_REGNUM),
21352
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21354
+ gen_rtx_SET (VOIDmode,
21359
+ [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6,6")
21360
+ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes,yes")
21361
(set_attr "conds" "use")]
21364
@@ -333,26 +526,59 @@
21365
;; addresses will have the thumb bit set correctly.
21368
-(define_insn "*thumb2_and_scc"
21369
- [(set (match_operand:SI 0 "s_register_operand" "=r")
21370
+(define_insn_and_split "*thumb2_and_scc"
21371
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
21372
(and:SI (match_operator:SI 1 "arm_comparison_operator"
21373
- [(match_operand 3 "cc_register" "") (const_int 0)])
21374
- (match_operand:SI 2 "s_register_operand" "r")))]
21375
+ [(match_operand 2 "cc_register" "") (const_int 0)])
21376
+ (match_operand:SI 3 "s_register_operand" "r")))]
21378
- "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
21379
+ "#" ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0"
21380
+ "&& reload_completed"
21381
+ [(set (match_dup 0)
21382
+ (and:SI (match_dup 3) (const_int 1)))
21383
+ (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))]
21385
+ enum machine_mode mode = GET_MODE (operands[2]);
21386
+ enum rtx_code rc = GET_CODE (operands[1]);
21388
+ if (mode == CCFPmode || mode == CCFPEmode)
21389
+ rc = reverse_condition_maybe_unordered (rc);
21391
+ rc = reverse_condition (rc);
21392
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
21394
[(set_attr "conds" "use")
21395
- (set_attr "length" "10")]
21396
+ (set (attr "length") (if_then_else (match_test "arm_restrict_it")
21398
+ (const_int 10)))]
21401
-(define_insn "*thumb2_ior_scc"
21402
+(define_insn_and_split "*thumb2_ior_scc"
21403
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
21404
- (ior:SI (match_operator:SI 2 "arm_comparison_operator"
21405
- [(match_operand 3 "cc_register" "") (const_int 0)])
21406
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
21407
+ (ior:SI (match_operator:SI 1 "arm_comparison_operator"
21408
+ [(match_operand 2 "cc_register" "") (const_int 0)])
21409
+ (match_operand:SI 3 "s_register_operand" "0,?r")))]
21412
- it\\t%d2\;orr%d2\\t%0, %1, #1
21413
- ite\\t%D2\;mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
21414
+ it\\t%d1\;orr%d1\\t%0, %3, #1
21416
+ ; alt 1: ite\\t%D1\;mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
21417
+ "&& reload_completed
21418
+ && REGNO (operands [0]) != REGNO (operands[3])"
21419
+ [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
21420
+ (cond_exec (match_dup 4) (set (match_dup 0)
21421
+ (ior:SI (match_dup 3) (const_int 1))))]
21423
+ enum machine_mode mode = GET_MODE (operands[2]);
21424
+ enum rtx_code rc = GET_CODE (operands[1]);
21426
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
21427
+ if (mode == CCFPmode || mode == CCFPEmode)
21428
+ rc = reverse_condition_maybe_unordered (rc);
21430
+ rc = reverse_condition (rc);
21431
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
21433
[(set_attr "conds" "use")
21434
(set_attr "length" "6,10")]
21436
@@ -459,25 +685,70 @@
21437
(set_attr "length" "10,14")]
21440
-(define_insn "*thumb2_negscc"
21441
- [(set (match_operand:SI 0 "s_register_operand" "=r")
21442
+(define_insn_and_split "*thumb2_negscc"
21443
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
21444
(neg:SI (match_operator 3 "arm_comparison_operator"
21445
[(match_operand:SI 1 "s_register_operand" "r")
21446
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
21447
(clobber (reg:CC CC_REGNUM))]
21450
- if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
21451
- return \"asr\\t%0, %1, #31\";
21453
+ "&& reload_completed"
21456
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
21458
- if (GET_CODE (operands[3]) == NE)
21459
- return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\";
21460
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
21462
+ /* Emit asr\\t%0, %1, #31 */
21463
+ emit_insn (gen_rtx_SET (VOIDmode,
21465
+ gen_rtx_ASHIFTRT (SImode,
21470
+ else if (GET_CODE (operands[3]) == NE && !arm_restrict_it)
21472
+ /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */
21473
+ if (CONST_INT_P (operands[2]))
21474
+ emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
21475
+ GEN_INT (- INTVAL (operands[2]))));
21477
+ emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
21479
- output_asm_insn (\"cmp\\t%1, %2\", operands);
21480
- output_asm_insn (\"ite\\t%D3\", operands);
21481
- output_asm_insn (\"mov%D3\\t%0, #0\", operands);
21482
- return \"mvn%d3\\t%0, #0\";
21484
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21485
+ gen_rtx_NE (SImode,
21488
+ gen_rtx_SET (SImode,
21495
+ /* Emit: cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/
21496
+ enum rtx_code rc = reverse_condition (GET_CODE (operands[3]));
21497
+ enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]);
21498
+ rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM);
21500
+ emit_insn (gen_rtx_SET (VOIDmode,
21502
+ gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
21504
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0)));
21506
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21507
+ gen_rtx_fmt_ee (rc,
21511
+ gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
21516
[(set_attr "conds" "clob")
21517
(set_attr "length" "14")]
21519
@@ -823,7 +1094,7 @@
21520
"mul%!\\t%0, %2, %0"
21521
[(set_attr "predicable" "yes")
21522
(set_attr "length" "2")
21523
- (set_attr "insn" "muls")])
21524
+ (set_attr "type" "muls")])
21526
(define_insn "*thumb2_mulsi_short_compare0"
21527
[(set (reg:CC_NOOV CC_REGNUM)
21528
@@ -836,7 +1107,7 @@
21529
"TARGET_THUMB2 && optimize_size"
21530
"muls\\t%0, %2, %0"
21531
[(set_attr "length" "2")
21532
- (set_attr "insn" "muls")])
21533
+ (set_attr "type" "muls")])
21535
(define_insn "*thumb2_mulsi_short_compare0_scratch"
21536
[(set (reg:CC_NOOV CC_REGNUM)
21537
@@ -848,7 +1119,7 @@
21538
"TARGET_THUMB2 && optimize_size"
21539
"muls\\t%0, %2, %0"
21540
[(set_attr "length" "2")
21541
- (set_attr "insn" "muls")])
21542
+ (set_attr "type" "muls")])
21544
(define_insn "*thumb2_cbz"
21545
[(set (pc) (if_then_else
21546
--- a/src/gcc/config/arm/arm.c
21547
+++ b/src/gcc/config/arm/arm.c
21548
@@ -173,6 +173,7 @@
21549
static tree arm_builtin_decl (unsigned, bool);
21550
static void emit_constant_insn (rtx cond, rtx pattern);
21551
static rtx emit_set_insn (rtx, rtx);
21552
+static rtx emit_multi_reg_push (unsigned long);
21553
static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
21555
static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
21556
@@ -280,6 +281,7 @@
21558
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
21559
bool op0_preserve_value);
21560
+static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
21562
/* Table of machine attributes. */
21563
static const struct attribute_spec arm_attribute_table[] =
21564
@@ -620,6 +622,13 @@
21565
#undef TARGET_CLASS_LIKELY_SPILLED_P
21566
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
21568
+#undef TARGET_VECTORIZE_BUILTINS
21569
+#define TARGET_VECTORIZE_BUILTINS
21571
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21572
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
21573
+ arm_builtin_vectorized_function
21575
#undef TARGET_VECTOR_ALIGNMENT
21576
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
21578
@@ -649,6 +658,13 @@
21579
#define TARGET_CANONICALIZE_COMPARISON \
21580
arm_canonicalize_comparison
21582
+#undef TARGET_ASAN_SHADOW_OFFSET
21583
+#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
21585
+#undef MAX_INSN_PER_IT_BLOCK
21586
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
21589
struct gcc_target targetm = TARGET_INITIALIZER;
21591
/* Obstack for minipool constant handling. */
21592
@@ -839,6 +855,10 @@
21593
int arm_arch_arm_hwdiv;
21594
int arm_arch_thumb_hwdiv;
21596
+/* Nonzero if we should use Neon to handle 64-bits operations rather
21597
+ than core registers. */
21598
+int prefer_neon_for_64bits = 0;
21600
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
21601
we must report the mode of the memory reference from
21602
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
21603
@@ -936,6 +956,7 @@
21604
false, /* Prefer LDRD/STRD. */
21605
{true, true}, /* Prefer non short circuit. */
21606
&arm_default_vec_cost, /* Vectorizer costs. */
21607
+ false /* Prefer Neon for 64-bits bitops. */
21610
const struct tune_params arm_fastmul_tune =
21611
@@ -950,6 +971,7 @@
21612
false, /* Prefer LDRD/STRD. */
21613
{true, true}, /* Prefer non short circuit. */
21614
&arm_default_vec_cost, /* Vectorizer costs. */
21615
+ false /* Prefer Neon for 64-bits bitops. */
21618
/* StrongARM has early execution of branches, so a sequence that is worth
21619
@@ -967,6 +989,7 @@
21620
false, /* Prefer LDRD/STRD. */
21621
{true, true}, /* Prefer non short circuit. */
21622
&arm_default_vec_cost, /* Vectorizer costs. */
21623
+ false /* Prefer Neon for 64-bits bitops. */
21626
const struct tune_params arm_xscale_tune =
21627
@@ -981,6 +1004,7 @@
21628
false, /* Prefer LDRD/STRD. */
21629
{true, true}, /* Prefer non short circuit. */
21630
&arm_default_vec_cost, /* Vectorizer costs. */
21631
+ false /* Prefer Neon for 64-bits bitops. */
21634
const struct tune_params arm_9e_tune =
21635
@@ -995,6 +1019,7 @@
21636
false, /* Prefer LDRD/STRD. */
21637
{true, true}, /* Prefer non short circuit. */
21638
&arm_default_vec_cost, /* Vectorizer costs. */
21639
+ false /* Prefer Neon for 64-bits bitops. */
21642
const struct tune_params arm_v6t2_tune =
21643
@@ -1009,6 +1034,7 @@
21644
false, /* Prefer LDRD/STRD. */
21645
{true, true}, /* Prefer non short circuit. */
21646
&arm_default_vec_cost, /* Vectorizer costs. */
21647
+ false /* Prefer Neon for 64-bits bitops. */
21650
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
21651
@@ -1024,6 +1050,7 @@
21652
false, /* Prefer LDRD/STRD. */
21653
{true, true}, /* Prefer non short circuit. */
21654
&arm_default_vec_cost, /* Vectorizer costs. */
21655
+ false /* Prefer Neon for 64-bits bitops. */
21658
const struct tune_params arm_cortex_a15_tune =
21659
@@ -1031,13 +1058,14 @@
21662
1, /* Constant limit. */
21663
- 5, /* Max cond insns. */
21664
+ 2, /* Max cond insns. */
21665
ARM_PREFETCH_NOT_BENEFICIAL,
21666
false, /* Prefer constant pool. */
21667
arm_default_branch_cost,
21668
true, /* Prefer LDRD/STRD. */
21669
{true, true}, /* Prefer non short circuit. */
21670
&arm_default_vec_cost, /* Vectorizer costs. */
21671
+ false /* Prefer Neon for 64-bits bitops. */
21674
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
21675
@@ -1055,6 +1083,7 @@
21676
false, /* Prefer LDRD/STRD. */
21677
{false, false}, /* Prefer non short circuit. */
21678
&arm_default_vec_cost, /* Vectorizer costs. */
21679
+ false /* Prefer Neon for 64-bits bitops. */
21682
const struct tune_params arm_cortex_a9_tune =
21683
@@ -1069,6 +1098,7 @@
21684
false, /* Prefer LDRD/STRD. */
21685
{true, true}, /* Prefer non short circuit. */
21686
&arm_default_vec_cost, /* Vectorizer costs. */
21687
+ false /* Prefer Neon for 64-bits bitops. */
21690
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
21691
@@ -1085,6 +1115,7 @@
21692
false, /* Prefer LDRD/STRD. */
21693
{false, false}, /* Prefer non short circuit. */
21694
&arm_default_vec_cost, /* Vectorizer costs. */
21695
+ false /* Prefer Neon for 64-bits bitops. */
21698
const struct tune_params arm_fa726te_tune =
21699
@@ -1099,6 +1130,7 @@
21700
false, /* Prefer LDRD/STRD. */
21701
{true, true}, /* Prefer non short circuit. */
21702
&arm_default_vec_cost, /* Vectorizer costs. */
21703
+ false /* Prefer Neon for 64-bits bitops. */
21707
@@ -1842,7 +1874,12 @@
21708
arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
21709
arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
21710
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
21711
+ if (arm_restrict_it == 2)
21712
+ arm_restrict_it = arm_arch8 && TARGET_THUMB2;
21714
+ if (!TARGET_THUMB2)
21715
+ arm_restrict_it = 0;
21717
/* If we are not using the default (ARM mode) section anchor offset
21718
ranges, then set the correct ranges now. */
21720
@@ -2129,11 +2166,25 @@
21721
global_options.x_param_values,
21722
global_options_set.x_param_values);
21724
+ /* Use Neon to perform 64-bits operations rather than core
21726
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
21727
+ if (use_neon_for_64bits == 1)
21728
+ prefer_neon_for_64bits = true;
21730
/* Use the alternative scheduling-pressure algorithm by default. */
21731
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
21732
global_options.x_param_values,
21733
global_options_set.x_param_values);
21735
+ /* Disable shrink-wrap when optimizing function for size, since it tends to
21736
+ generate additional returns. */
21737
+ if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
21738
+ flag_shrink_wrap = false;
21739
+ /* TBD: Dwarf info for apcs frame is not handled yet. */
21740
+ if (TARGET_APCS_FRAME)
21741
+ flag_shrink_wrap = false;
21743
/* Register global variables with the garbage collector. */
21744
arm_add_gc_roots ();
21746
@@ -2382,6 +2433,10 @@
21747
if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
21750
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
21751
+ && !optimize_function_for_size_p (cfun))
21754
offsets = arm_get_frame_offsets ();
21755
stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21757
@@ -2479,6 +2534,18 @@
21761
+/* Return TRUE if we should try to use a simple_return insn, i.e. perform
21762
+ shrink-wrapping if possible. This is the case if we need to emit a
21763
+ prologue, which we can test by looking at the offsets. */
21765
+use_simple_return_p (void)
21767
+ arm_stack_offsets *offsets;
21769
+ offsets = arm_get_frame_offsets ();
21770
+ return offsets->outgoing_args != 0;
21773
/* Return TRUE if int I is a valid immediate ARM constant. */
21776
@@ -2617,6 +2684,9 @@
21781
+ return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
21782
+ && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
21784
return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
21786
@@ -5335,9 +5405,8 @@
21787
if (cfun->machine->sibcall_blocked)
21790
- /* Never tailcall something for which we have no decl, or if we
21791
- are generating code for Thumb-1. */
21792
- if (decl == NULL || TARGET_THUMB1)
21793
+ /* Never tailcall something if we are generating code for Thumb-1. */
21794
+ if (TARGET_THUMB1)
21797
/* The PIC register is live on entry to VxWorks PLT entries, so we
21798
@@ -5347,13 +5416,14 @@
21800
/* Cannot tail-call to long calls, since these are out of range of
21801
a branch instruction. */
21802
- if (arm_is_long_call_p (decl))
21803
+ if (decl && arm_is_long_call_p (decl))
21806
/* If we are interworking and the function is not declared static
21807
then we can't tail-call it unless we know that it exists in this
21808
compilation unit (since it might be a Thumb routine). */
21809
- if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
21810
+ if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
21811
+ && !TREE_ASM_WRITTEN (decl))
21814
func_type = arm_current_func_type ();
21815
@@ -5385,6 +5455,7 @@
21817
if (TARGET_AAPCS_BASED
21818
&& arm_abi == ARM_ABI_AAPCS
21820
&& DECL_WEAK (decl))
21823
@@ -9068,6 +9139,12 @@
21828
+arm_max_conditional_execute (void)
21830
+ return max_insns_skipped;
21834
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
21836
@@ -11823,6 +11900,142 @@
21840
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
21843
+next_consecutive_mem (rtx mem)
21845
+ enum machine_mode mode = GET_MODE (mem);
21846
+ HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
21847
+ rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
21849
+ return adjust_automodify_address (mem, mode, addr, offset);
21852
+/* Copy using LDRD/STRD instructions whenever possible.
21853
+ Returns true upon success. */
21855
+gen_movmem_ldrd_strd (rtx *operands)
21857
+ unsigned HOST_WIDE_INT len;
21858
+ HOST_WIDE_INT align;
21859
+ rtx src, dst, base;
21861
+ bool src_aligned, dst_aligned;
21862
+ bool src_volatile, dst_volatile;
21864
+ gcc_assert (CONST_INT_P (operands[2]));
21865
+ gcc_assert (CONST_INT_P (operands[3]));
21867
+ len = UINTVAL (operands[2]);
21871
+ /* Maximum alignment we can assume for both src and dst buffers. */
21872
+ align = INTVAL (operands[3]);
21874
+ if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
21877
+ /* Place src and dst addresses in registers
21878
+ and update the corresponding mem rtx. */
21879
+ dst = operands[0];
21880
+ dst_volatile = MEM_VOLATILE_P (dst);
21881
+ dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
21882
+ base = copy_to_mode_reg (SImode, XEXP (dst, 0));
21883
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
21885
+ src = operands[1];
21886
+ src_volatile = MEM_VOLATILE_P (src);
21887
+ src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
21888
+ base = copy_to_mode_reg (SImode, XEXP (src, 0));
21889
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
21891
+ if (!unaligned_access && !(src_aligned && dst_aligned))
21894
+ if (src_volatile || dst_volatile)
21897
+ /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
21898
+ if (!(dst_aligned || src_aligned))
21899
+ return arm_gen_movmemqi (operands);
21901
+ src = adjust_address (src, DImode, 0);
21902
+ dst = adjust_address (dst, DImode, 0);
21906
+ reg0 = gen_reg_rtx (DImode);
21908
+ emit_move_insn (reg0, src);
21910
+ emit_insn (gen_unaligned_loaddi (reg0, src));
21913
+ emit_move_insn (dst, reg0);
21915
+ emit_insn (gen_unaligned_storedi (dst, reg0));
21917
+ src = next_consecutive_mem (src);
21918
+ dst = next_consecutive_mem (dst);
21921
+ gcc_assert (len < 8);
21924
+ /* More than a word but less than a double-word to copy. Copy a word. */
21925
+ reg0 = gen_reg_rtx (SImode);
21926
+ src = adjust_address (src, SImode, 0);
21927
+ dst = adjust_address (dst, SImode, 0);
21929
+ emit_move_insn (reg0, src);
21931
+ emit_insn (gen_unaligned_loadsi (reg0, src));
21934
+ emit_move_insn (dst, reg0);
21936
+ emit_insn (gen_unaligned_storesi (dst, reg0));
21938
+ src = next_consecutive_mem (src);
21939
+ dst = next_consecutive_mem (dst);
21946
+ /* Copy the remaining bytes. */
21949
+ dst = adjust_address (dst, HImode, 0);
21950
+ src = adjust_address (src, HImode, 0);
21951
+ reg0 = gen_reg_rtx (SImode);
21953
+ emit_insn (gen_zero_extendhisi2 (reg0, src));
21955
+ emit_insn (gen_unaligned_loadhiu (reg0, src));
21958
+ emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
21960
+ emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
21962
+ src = next_consecutive_mem (src);
21963
+ dst = next_consecutive_mem (dst);
21968
+ dst = adjust_address (dst, QImode, 0);
21969
+ src = adjust_address (src, QImode, 0);
21970
+ reg0 = gen_reg_rtx (QImode);
21971
+ emit_move_insn (reg0, src);
21972
+ emit_move_insn (dst, reg0);
21976
/* Select a dominance comparison mode if possible for a test of the general
21977
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
21978
COND_OR == DOM_CC_X_AND_Y => (X && Y)
21979
@@ -12623,6 +12836,277 @@
21983
+/* Helper for gen_operands_ldrd_strd. Returns true iff the memory
21984
+ operand ADDR is an immediate offset from the base register and is
21985
+ not volatile, in which case it sets BASE and OFFSET
21988
+mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
21990
+ /* TODO: Handle more general memory operand patterns, such as
21991
+ PRE_DEC and PRE_INC. */
21993
+ /* Convert a subreg of mem into mem itself. */
21994
+ if (GET_CODE (addr) == SUBREG)
21995
+ addr = alter_subreg (&addr, true);
21997
+ gcc_assert (MEM_P (addr));
21999
+ /* Don't modify volatile memory accesses. */
22000
+ if (MEM_VOLATILE_P (addr))
22003
+ *offset = const0_rtx;
22005
+ addr = XEXP (addr, 0);
22006
+ if (REG_P (addr))
22011
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
22013
+ *base = XEXP (addr, 0);
22014
+ *offset = XEXP (addr, 1);
22015
+ return (REG_P (*base) && CONST_INT_P (*offset));
22021
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
22023
+/* Called from a peephole2 to replace two word-size accesses with a
22024
+ single LDRD/STRD instruction. Returns true iff we can generate a
22025
+ new instruction sequence. That is, both accesses use the same base
22026
+ register and the gap between constant offsets is 4. This function
22027
+ may reorder its operands to match ldrd/strd RTL templates.
22028
+ OPERANDS are the operands found by the peephole matcher;
22029
+ OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
22030
+ corresponding memory operands. LOAD indicaates whether the access
22031
+ is load or store. CONST_STORE indicates a store of constant
22032
+ integer values held in OPERANDS[4,5] and assumes that the pattern
22033
+ is of length 4 insn, for the purpose of checking dead registers.
22034
+ COMMUTE indicates that register operands may be reordered. */
22036
+gen_operands_ldrd_strd (rtx *operands, bool load,
22037
+ bool const_store, bool commute)
22040
+ HOST_WIDE_INT offsets[2], offset;
22041
+ rtx base = NULL_RTX;
22042
+ rtx cur_base, cur_offset, tmp;
22044
+ HARD_REG_SET regset;
22046
+ gcc_assert (!const_store || !load);
22047
+ /* Check that the memory references are immediate offsets from the
22048
+ same base register. Extract the base register, the destination
22049
+ registers, and the corresponding memory offsets. */
22050
+ for (i = 0; i < nops; i++)
22052
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
22057
+ else if (REGNO (base) != REGNO (cur_base))
22060
+ offsets[i] = INTVAL (cur_offset);
22061
+ if (GET_CODE (operands[i]) == SUBREG)
22063
+ tmp = SUBREG_REG (operands[i]);
22064
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
22065
+ operands[i] = tmp;
22069
+ /* Make sure there is no dependency between the individual loads. */
22070
+ if (load && REGNO (operands[0]) == REGNO (base))
22071
+ return false; /* RAW */
22073
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
22074
+ return false; /* WAW */
22076
+ /* If the same input register is used in both stores
22077
+ when storing different constants, try to find a free register.
22078
+ For example, the code
22083
+ can be transformed into
22085
+ strd r1, r0, [r2]
22086
+ in Thumb mode assuming that r1 is free. */
22088
+ && REGNO (operands[0]) == REGNO (operands[1])
22089
+ && INTVAL (operands[4]) != INTVAL (operands[5]))
22091
+ if (TARGET_THUMB2)
22093
+ CLEAR_HARD_REG_SET (regset);
22094
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
22095
+ if (tmp == NULL_RTX)
22098
+ /* Use the new register in the first load to ensure that
22099
+ if the original input register is not dead after peephole,
22100
+ then it will have the correct constant value. */
22101
+ operands[0] = tmp;
22103
+ else if (TARGET_ARM)
22106
+ int regno = REGNO (operands[0]);
22107
+ if (!peep2_reg_dead_p (4, operands[0]))
22109
+ /* When the input register is even and is not dead after the
22110
+ pattern, it has to hold the second constant but we cannot
22111
+ form a legal STRD in ARM mode with this register as the second
22113
+ if (regno % 2 == 0)
22116
+ /* Is regno-1 free? */
22117
+ SET_HARD_REG_SET (regset);
22118
+ CLEAR_HARD_REG_BIT(regset, regno - 1);
22119
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
22120
+ if (tmp == NULL_RTX)
22123
+ operands[0] = tmp;
22127
+ /* Find a DImode register. */
22128
+ CLEAR_HARD_REG_SET (regset);
22129
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
22130
+ if (tmp != NULL_RTX)
22132
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
22133
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
22137
+ /* Can we use the input register to form a DI register? */
22138
+ SET_HARD_REG_SET (regset);
22139
+ CLEAR_HARD_REG_BIT(regset,
22140
+ regno % 2 == 0 ? regno + 1 : regno - 1);
22141
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
22142
+ if (tmp == NULL_RTX)
22144
+ operands[regno % 2 == 1 ? 0 : 1] = tmp;
22148
+ gcc_assert (operands[0] != NULL_RTX);
22149
+ gcc_assert (operands[1] != NULL_RTX);
22150
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
22151
+ gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
22155
+ /* Make sure the instructions are ordered with lower memory access first. */
22156
+ if (offsets[0] > offsets[1])
22158
+ gap = offsets[0] - offsets[1];
22159
+ offset = offsets[1];
22161
+ /* Swap the instructions such that lower memory is accessed first. */
22162
+ SWAP_RTX (operands[0], operands[1]);
22163
+ SWAP_RTX (operands[2], operands[3]);
22165
+ SWAP_RTX (operands[4], operands[5]);
22169
+ gap = offsets[1] - offsets[0];
22170
+ offset = offsets[0];
22173
+ /* Make sure accesses are to consecutive memory locations. */
22177
+ /* Make sure we generate legal instructions. */
22178
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
22182
+ /* In Thumb state, where registers are almost unconstrained, there
22183
+ is little hope to fix it. */
22184
+ if (TARGET_THUMB2)
22187
+ if (load && commute)
22189
+ /* Try reordering registers. */
22190
+ SWAP_RTX (operands[0], operands[1]);
22191
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
22198
+ /* If input registers are dead after this pattern, they can be
22199
+ reordered or replaced by other registers that are free in the
22200
+ current pattern. */
22201
+ if (!peep2_reg_dead_p (4, operands[0])
22202
+ || !peep2_reg_dead_p (4, operands[1]))
22205
+ /* Try to reorder the input registers. */
22206
+ /* For example, the code
22211
+ can be transformed into
22216
+ if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
22219
+ SWAP_RTX (operands[0], operands[1]);
22223
+ /* Try to find a free DI register. */
22224
+ CLEAR_HARD_REG_SET (regset);
22225
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
22226
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
22229
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
22230
+ if (tmp == NULL_RTX)
22233
+ /* DREG must be an even-numbered register in DImode.
22234
+ Split it into SI registers. */
22235
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
22236
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
22237
+ gcc_assert (operands[0] != NULL_RTX);
22238
+ gcc_assert (operands[1] != NULL_RTX);
22239
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
22240
+ gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
22242
+ return (operands_ok_ldrd_strd (operands[0], operands[1],
22255
/* Print a symbolic form of X to the debug file, F. */
22257
@@ -14814,7 +15298,8 @@
22259
/* Constraints should ensure this. */
22260
gcc_assert (code0 == MEM && code1 == REG);
22261
- gcc_assert (REGNO (operands[1]) != IP_REGNUM);
22262
+ gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
22263
+ || (TARGET_ARM && TARGET_LDRD));
22265
switch (GET_CODE (XEXP (operands[0], 0)))
22267
@@ -16407,6 +16892,148 @@
22271
+/* STRD in ARM mode requires consecutive registers. This function emits STRD
22272
+ whenever possible, otherwise it emits single-word stores. The first store
22273
+ also allocates stack space for all saved registers, using writeback with
22274
+ post-addressing mode. All other stores use offset addressing. If no STRD
22275
+ can be emitted, this function emits a sequence of single-word stores,
22276
+ and not an STM as before, because single-word stores provide more freedom
22277
+ scheduling and can be turned into an STM by peephole optimizations. */
22279
+arm_emit_strd_push (unsigned long saved_regs_mask)
22281
+ int num_regs = 0;
22282
+ int i, j, dwarf_index = 0;
22284
+ rtx dwarf = NULL_RTX;
22285
+ rtx insn = NULL_RTX;
22288
+ /* TODO: A more efficient code can be emitted by changing the
22289
+ layout, e.g., first push all pairs that can use STRD to keep the
22290
+ stack aligned, and then push all other registers. */
22291
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
22292
+ if (saved_regs_mask & (1 << i))
22295
+ gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22296
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22297
+ gcc_assert (num_regs > 0);
22299
+ /* Create sequence for DWARF info. */
22300
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22302
+ /* For dwarf info, we generate explicit stack update. */
22303
+ tmp = gen_rtx_SET (VOIDmode,
22304
+ stack_pointer_rtx,
22305
+ plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22306
+ RTX_FRAME_RELATED_P (tmp) = 1;
22307
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22309
+ /* Save registers. */
22310
+ offset = - 4 * num_regs;
22312
+ while (j <= LAST_ARM_REGNUM)
22313
+ if (saved_regs_mask & (1 << j))
22316
+ && (saved_regs_mask & (1 << (j + 1))))
22318
+ /* Current register and previous register form register pair for
22319
+ which STRD can be generated. */
22322
+ /* Allocate stack space for all saved registers. */
22323
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22324
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22325
+ mem = gen_frame_mem (DImode, tmp);
22328
+ else if (offset > 0)
22329
+ mem = gen_frame_mem (DImode,
22330
+ plus_constant (Pmode,
22331
+ stack_pointer_rtx,
22334
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
22336
+ tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
22337
+ RTX_FRAME_RELATED_P (tmp) = 1;
22338
+ tmp = emit_insn (tmp);
22340
+ /* Record the first store insn. */
22341
+ if (dwarf_index == 1)
22344
+ /* Generate dwarf info. */
22345
+ mem = gen_frame_mem (SImode,
22346
+ plus_constant (Pmode,
22347
+ stack_pointer_rtx,
22349
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
22350
+ RTX_FRAME_RELATED_P (tmp) = 1;
22351
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22353
+ mem = gen_frame_mem (SImode,
22354
+ plus_constant (Pmode,
22355
+ stack_pointer_rtx,
22357
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
22358
+ RTX_FRAME_RELATED_P (tmp) = 1;
22359
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22366
+ /* Emit a single word store. */
22369
+ /* Allocate stack space for all saved registers. */
22370
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22371
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22372
+ mem = gen_frame_mem (SImode, tmp);
22375
+ else if (offset > 0)
22376
+ mem = gen_frame_mem (SImode,
22377
+ plus_constant (Pmode,
22378
+ stack_pointer_rtx,
22381
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
22383
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
22384
+ RTX_FRAME_RELATED_P (tmp) = 1;
22385
+ tmp = emit_insn (tmp);
22387
+ /* Record the first store insn. */
22388
+ if (dwarf_index == 1)
22391
+ /* Generate dwarf info. */
22392
+ mem = gen_frame_mem (SImode,
22393
+ plus_constant(Pmode,
22394
+ stack_pointer_rtx,
22396
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
22397
+ RTX_FRAME_RELATED_P (tmp) = 1;
22398
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22407
+ /* Attach dwarf info to the first insn we generate. */
22408
+ gcc_assert (insn != NULL_RTX);
22409
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22410
+ RTX_FRAME_RELATED_P (insn) = 1;
22413
/* Generate and emit an insn that we will recognize as a push_multi.
22414
Unfortunately, since this insn does not reflect very well the actual
22415
semantics of the operation, we need to annotate the insn for the benefit
22416
@@ -16549,6 +17176,19 @@
22420
+/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22421
+ SIZE is the offset to be adjusted.
22422
+ DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22424
+arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22428
+ RTX_FRAME_RELATED_P (insn) = 1;
22429
+ dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
22430
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22433
/* Generate and emit an insn pattern that we will recognize as a pop_multi.
22434
SAVED_REGS_MASK shows which registers need to be restored.
22436
@@ -16606,6 +17246,17 @@
22437
if (saved_regs_mask & (1 << i))
22439
reg = gen_rtx_REG (SImode, i);
22440
+ if ((num_regs == 1) && emit_update && !return_in_pc)
22442
+ /* Emit single load with writeback. */
22443
+ tmp = gen_frame_mem (SImode,
22444
+ gen_rtx_POST_INC (Pmode,
22445
+ stack_pointer_rtx));
22446
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
22447
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22451
tmp = gen_rtx_SET (VOIDmode,
22454
@@ -16628,6 +17279,9 @@
22455
par = emit_insn (par);
22457
REG_NOTES (par) = dwarf;
22458
+ if (!return_in_pc)
22459
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22460
+ stack_pointer_rtx, stack_pointer_rtx);
22463
/* Generate and emit an insn pattern that we will recognize as a pop_multi
22464
@@ -16698,6 +17352,9 @@
22466
par = emit_insn (par);
22467
REG_NOTES (par) = dwarf;
22469
+ arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22470
+ base_reg, base_reg);
22473
/* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22474
@@ -16773,6 +17430,7 @@
22475
pattern can be emitted now. */
22476
par = emit_insn (par);
22477
REG_NOTES (par) = dwarf;
22478
+ RTX_FRAME_RELATED_P (par) = 1;
22482
@@ -16789,7 +17447,12 @@
22484
plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22485
RTX_FRAME_RELATED_P (tmp) = 1;
22487
+ tmp = emit_insn (tmp);
22488
+ if (!return_in_pc)
22490
+ arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22491
+ stack_pointer_rtx, stack_pointer_rtx);
22496
@@ -16823,9 +17486,11 @@
22499
par = emit_insn (tmp);
22500
+ REG_NOTES (par) = dwarf;
22501
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22502
+ stack_pointer_rtx, stack_pointer_rtx);
22505
- REG_NOTES (par) = dwarf;
22507
else if ((num_regs % 2) == 1 && return_in_pc)
22509
@@ -16837,6 +17502,129 @@
22513
+/* LDRD in ARM mode needs consecutive registers as operands. This function
22514
+ emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22515
+ offset addressing and then generates one separate stack udpate. This provides
22516
+ more scheduling freedom, compared to writeback on every load. However,
22517
+ if the function returns using load into PC directly
22518
+ (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22519
+ before the last load. TODO: Add a peephole optimization to recognize
22520
+ the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22521
+ peephole optimization to merge the load at stack-offset zero
22522
+ with the stack update instruction using load with writeback
22523
+ in post-index addressing mode. */
22525
+arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22529
+ rtx par = NULL_RTX;
22530
+ rtx dwarf = NULL_RTX;
22533
+ /* Restore saved registers. */
22534
+ gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22536
+ while (j <= LAST_ARM_REGNUM)
22537
+ if (saved_regs_mask & (1 << j))
22540
+ && (saved_regs_mask & (1 << (j + 1)))
22541
+ && (j + 1) != PC_REGNUM)
22543
+ /* Current register and next register form register pair for which
22544
+ LDRD can be generated. PC is always the last register popped, and
22545
+ we handle it separately. */
22547
+ mem = gen_frame_mem (DImode,
22548
+ plus_constant (Pmode,
22549
+ stack_pointer_rtx,
22552
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
22554
+ tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
22555
+ RTX_FRAME_RELATED_P (tmp) = 1;
22556
+ tmp = emit_insn (tmp);
22558
+ /* Generate dwarf info. */
22560
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
22561
+ gen_rtx_REG (SImode, j),
22563
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
22564
+ gen_rtx_REG (SImode, j + 1),
22567
+ REG_NOTES (tmp) = dwarf;
22572
+ else if (j != PC_REGNUM)
22574
+ /* Emit a single word load. */
22576
+ mem = gen_frame_mem (SImode,
22577
+ plus_constant (Pmode,
22578
+ stack_pointer_rtx,
22581
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
22583
+ tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
22584
+ RTX_FRAME_RELATED_P (tmp) = 1;
22585
+ tmp = emit_insn (tmp);
22587
+ /* Generate dwarf info. */
22588
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22589
+ gen_rtx_REG (SImode, j),
22595
+ else /* j == PC_REGNUM */
22601
+ /* Update the stack. */
22604
+ tmp = gen_rtx_SET (Pmode,
22605
+ stack_pointer_rtx,
22606
+ plus_constant (Pmode,
22607
+ stack_pointer_rtx,
22609
+ RTX_FRAME_RELATED_P (tmp) = 1;
22614
+ if (saved_regs_mask & (1 << PC_REGNUM))
22616
+ /* Only PC is to be popped. */
22617
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22618
+ XVECEXP (par, 0, 0) = ret_rtx;
22619
+ tmp = gen_rtx_SET (SImode,
22620
+ gen_rtx_REG (SImode, PC_REGNUM),
22621
+ gen_frame_mem (SImode,
22622
+ gen_rtx_POST_INC (SImode,
22623
+ stack_pointer_rtx)));
22624
+ RTX_FRAME_RELATED_P (tmp) = 1;
22625
+ XVECEXP (par, 0, 1) = tmp;
22626
+ par = emit_jump_insn (par);
22628
+ /* Generate dwarf info. */
22629
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
22630
+ gen_rtx_REG (SImode, PC_REGNUM),
22632
+ REG_NOTES (par) = dwarf;
22636
/* Calculate the size of the return value that is passed in registers. */
22638
arm_size_return_regs (void)
22639
@@ -16861,11 +17649,27 @@
22640
|| df_regs_ever_live_p (LR_REGNUM));
22643
+/* We do not know if r3 will be available because
22644
+ we do have an indirect tailcall happening in this
22645
+ particular case. */
22647
+is_indirect_tailcall_p (rtx call)
22649
+ rtx pat = PATTERN (call);
22651
+ /* Indirect tail call. */
22652
+ pat = XVECEXP (pat, 0, 0);
22653
+ if (GET_CODE (pat) == SET)
22654
+ pat = SET_SRC (pat);
22656
+ pat = XEXP (XEXP (pat, 0), 0);
22657
+ return REG_P (pat);
22660
/* Return true if r3 is used by any of the tail call insns in the
22661
current function. */
22663
-any_sibcall_uses_r3 (void)
22664
+any_sibcall_could_use_r3 (void)
22668
@@ -16879,7 +17683,8 @@
22669
if (!CALL_P (call))
22670
call = prev_nonnote_nondebug_insn (call);
22671
gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22672
- if (find_regno_fusage (call, USE, 3))
22673
+ if (find_regno_fusage (call, USE, 3)
22674
+ || is_indirect_tailcall_p (call))
22678
@@ -17046,9 +17851,10 @@
22679
/* If it is safe to use r3, then do so. This sometimes
22680
generates better code on Thumb-2 by avoiding the need to
22681
use 32-bit push/pop instructions. */
22682
- if (! any_sibcall_uses_r3 ()
22683
+ if (! any_sibcall_could_use_r3 ()
22684
&& arm_size_return_regs () <= 12
22685
- && (offsets->saved_regs_mask & (1 << 3)) == 0)
22686
+ && (offsets->saved_regs_mask & (1 << 3)) == 0
22687
+ && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
22691
@@ -17480,6 +18286,12 @@
22693
thumb2_emit_strd_push (live_regs_mask);
22695
+ else if (TARGET_ARM
22696
+ && !TARGET_APCS_FRAME
22697
+ && !IS_INTERRUPT (func_type))
22699
+ arm_emit_strd_push (live_regs_mask);
22703
insn = emit_multi_reg_push (live_regs_mask);
22704
@@ -18757,7 +19569,14 @@
22705
enum arm_cond_code code;
22710
+ /* Maximum number of conditionally executed instructions in a block
22711
+ is minimum of the two max values: maximum allowed in an IT block
22712
+ and maximum that is beneficial according to the cost model and tune. */
22713
+ max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
22714
+ max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
22716
/* Remove the previous insn from the count of insns to be output. */
22717
if (arm_condexec_count)
22718
arm_condexec_count--;
22719
@@ -18799,9 +19618,9 @@
22720
/* ??? Recognize conditional jumps, and combine them with IT blocks. */
22721
if (GET_CODE (body) != COND_EXEC)
22723
- /* Allow up to 4 conditionally executed instructions in a block. */
22724
+ /* Maximum number of conditionally executed instructions in a block. */
22725
n = get_attr_ce_count (insn);
22726
- if (arm_condexec_masklen + n > 4)
22727
+ if (arm_condexec_masklen + n > max)
22730
predicate = COND_EXEC_TEST (body);
22731
@@ -19359,6 +20178,7 @@
22739
@@ -19376,14 +20196,15 @@
22740
#define TYPE_MODE_BIT(X) (1 << (X))
22742
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22743
- | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
22744
- | TYPE_MODE_BIT (T_DI))
22745
+ | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22746
+ | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22747
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22748
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22749
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22751
#define v8qi_UP T_V8QI
22752
#define v4hi_UP T_V4HI
22753
+#define v4hf_UP T_V4HF
22754
#define v2si_UP T_V2SI
22755
#define v2sf_UP T_V2SF
22757
@@ -19419,6 +20240,8 @@
22761
+ NEON_FLOAT_WIDEN,
22762
+ NEON_FLOAT_NARROW,
22766
@@ -19479,7 +20302,8 @@
22767
VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22768
{#N, NEON_##T, UP (J), CF (N, J), 0}
22770
-/* The mode entries in the following table correspond to the "key" type of the
22771
+/* The NEON builtin data can be found in arm_neon_builtins.def.
22772
+ The mode entries in the following table correspond to the "key" type of the
22773
instruction variant, i.e. equivalent to that which would be specified after
22774
the assembler mnemonic, which usually refers to the last vector operand.
22775
(Signed/unsigned/polynomial types are not differentiated between though, and
22776
@@ -19489,196 +20313,7 @@
22778
static neon_builtin_datum neon_builtin_data[] =
22780
- VAR10 (BINOP, vadd,
22781
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22782
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
22783
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
22784
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22785
- VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22786
- VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
22787
- VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22788
- VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22789
- VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
22790
- VAR2 (TERNOP, vfma, v2sf, v4sf),
22791
- VAR2 (TERNOP, vfms, v2sf, v4sf),
22792
- VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22793
- VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
22794
- VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
22795
- VAR2 (TERNOP, vqdmlal, v4hi, v2si),
22796
- VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
22797
- VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
22798
- VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
22799
- VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
22800
- VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
22801
- VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
22802
- VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
22803
- VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
22804
- VAR2 (BINOP, vqdmull, v4hi, v2si),
22805
- VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22806
- VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22807
- VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22808
- VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
22809
- VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
22810
- VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
22811
- VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22812
- VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22813
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22814
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
22815
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22816
- VAR10 (BINOP, vsub,
22817
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22818
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
22819
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
22820
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22821
- VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22822
- VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
22823
- VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22824
- VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22825
- VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22826
- VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22827
- VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22828
- VAR2 (BINOP, vcage, v2sf, v4sf),
22829
- VAR2 (BINOP, vcagt, v2sf, v4sf),
22830
- VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22831
- VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22832
- VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
22833
- VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22834
- VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
22835
- VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22836
- VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22837
- VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
22838
- VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22839
- VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22840
- VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
22841
- VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
22842
- VAR2 (BINOP, vrecps, v2sf, v4sf),
22843
- VAR2 (BINOP, vrsqrts, v2sf, v4sf),
22844
- VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22845
- VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
22846
- VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22847
- VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22848
- VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22849
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22850
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22851
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22852
- VAR2 (UNOP, vcnt, v8qi, v16qi),
22853
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
22854
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
22855
- VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
22856
- /* FIXME: vget_lane supports more variants than this! */
22857
- VAR10 (GETLANE, vget_lane,
22858
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22859
- VAR10 (SETLANE, vset_lane,
22860
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22861
- VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
22862
- VAR10 (DUP, vdup_n,
22863
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22864
- VAR10 (DUPLANE, vdup_lane,
22865
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22866
- VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
22867
- VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
22868
- VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
22869
- VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
22870
- VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
22871
- VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
22872
- VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
22873
- VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22874
- VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22875
- VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
22876
- VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
22877
- VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22878
- VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
22879
- VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
22880
- VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22881
- VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22882
- VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
22883
- VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
22884
- VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22885
- VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
22886
- VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
22887
- VAR10 (BINOP, vext,
22888
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22889
- VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22890
- VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
22891
- VAR2 (UNOP, vrev16, v8qi, v16qi),
22892
- VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
22893
- VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
22894
- VAR10 (SELECT, vbsl,
22895
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22896
- VAR2 (RINT, vrintn, v2sf, v4sf),
22897
- VAR2 (RINT, vrinta, v2sf, v4sf),
22898
- VAR2 (RINT, vrintp, v2sf, v4sf),
22899
- VAR2 (RINT, vrintm, v2sf, v4sf),
22900
- VAR2 (RINT, vrintz, v2sf, v4sf),
22901
- VAR2 (RINT, vrintx, v2sf, v4sf),
22902
- VAR1 (VTBL, vtbl1, v8qi),
22903
- VAR1 (VTBL, vtbl2, v8qi),
22904
- VAR1 (VTBL, vtbl3, v8qi),
22905
- VAR1 (VTBL, vtbl4, v8qi),
22906
- VAR1 (VTBX, vtbx1, v8qi),
22907
- VAR1 (VTBX, vtbx2, v8qi),
22908
- VAR1 (VTBX, vtbx3, v8qi),
22909
- VAR1 (VTBX, vtbx4, v8qi),
22910
- VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22911
- VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22912
- VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
22913
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
22914
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
22915
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
22916
- VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
22917
- VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
22918
- VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
22919
- VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
22920
- VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
22921
- VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
22922
- VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
22923
- VAR10 (LOAD1, vld1,
22924
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22925
- VAR10 (LOAD1LANE, vld1_lane,
22926
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22927
- VAR10 (LOAD1, vld1_dup,
22928
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22929
- VAR10 (STORE1, vst1,
22930
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22931
- VAR10 (STORE1LANE, vst1_lane,
22932
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22933
- VAR9 (LOADSTRUCT,
22934
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22935
- VAR7 (LOADSTRUCTLANE, vld2_lane,
22936
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22937
- VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
22938
- VAR9 (STORESTRUCT, vst2,
22939
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22940
- VAR7 (STORESTRUCTLANE, vst2_lane,
22941
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22942
- VAR9 (LOADSTRUCT,
22943
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22944
- VAR7 (LOADSTRUCTLANE, vld3_lane,
22945
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22946
- VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
22947
- VAR9 (STORESTRUCT, vst3,
22948
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22949
- VAR7 (STORESTRUCTLANE, vst3_lane,
22950
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22951
- VAR9 (LOADSTRUCT, vld4,
22952
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22953
- VAR7 (LOADSTRUCTLANE, vld4_lane,
22954
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22955
- VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
22956
- VAR9 (STORESTRUCT, vst4,
22957
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
22958
- VAR7 (STORESTRUCTLANE, vst4_lane,
22959
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
22960
- VAR10 (LOGICBINOP, vand,
22961
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22962
- VAR10 (LOGICBINOP, vorr,
22963
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22964
- VAR10 (BINOP, veor,
22965
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22966
- VAR10 (LOGICBINOP, vbic,
22967
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
22968
- VAR10 (LOGICBINOP, vorn,
22969
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
22970
+#include "arm_neon_builtins.def"
22974
@@ -19693,9 +20328,36 @@
22978
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
22979
- symbolic names defined here (which would require too much duplication).
22981
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
22982
+#define VAR1(T, N, A) \
22984
+#define VAR2(T, N, A, B) \
22985
+ VAR1 (T, N, A), \
22987
+#define VAR3(T, N, A, B, C) \
22988
+ VAR2 (T, N, A, B), \
22990
+#define VAR4(T, N, A, B, C, D) \
22991
+ VAR3 (T, N, A, B, C), \
22993
+#define VAR5(T, N, A, B, C, D, E) \
22994
+ VAR4 (T, N, A, B, C, D), \
22996
+#define VAR6(T, N, A, B, C, D, E, F) \
22997
+ VAR5 (T, N, A, B, C, D, E), \
22999
+#define VAR7(T, N, A, B, C, D, E, F, G) \
23000
+ VAR6 (T, N, A, B, C, D, E, F), \
23002
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
23003
+ VAR7 (T, N, A, B, C, D, E, F, G), \
23005
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23006
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
23008
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23009
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23013
ARM_BUILTIN_GETWCGR0,
23014
@@ -19944,11 +20606,25 @@
23016
ARM_BUILTIN_WMERGE,
23018
- ARM_BUILTIN_NEON_BASE,
23019
+#include "arm_neon_builtins.def"
23021
- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
23025
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23039
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23042
@@ -19959,6 +20635,7 @@
23044
tree neon_intQI_type_node;
23045
tree neon_intHI_type_node;
23046
+ tree neon_floatHF_type_node;
23047
tree neon_polyQI_type_node;
23048
tree neon_polyHI_type_node;
23049
tree neon_intSI_type_node;
23050
@@ -19985,6 +20662,7 @@
23052
tree V8QI_type_node;
23053
tree V4HI_type_node;
23054
+ tree V4HF_type_node;
23055
tree V2SI_type_node;
23056
tree V2SF_type_node;
23057
tree V16QI_type_node;
23058
@@ -20039,6 +20717,9 @@
23059
neon_float_type_node = make_node (REAL_TYPE);
23060
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23061
layout_type (neon_float_type_node);
23062
+ neon_floatHF_type_node = make_node (REAL_TYPE);
23063
+ TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23064
+ layout_type (neon_floatHF_type_node);
23066
/* Define typedefs which exactly correspond to the modes we are basing vector
23067
types on. If you change these names you'll need to change
23068
@@ -20047,6 +20728,8 @@
23069
"__builtin_neon_qi");
23070
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23071
"__builtin_neon_hi");
23072
+ (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23073
+ "__builtin_neon_hf");
23074
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23075
"__builtin_neon_si");
23076
(*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23077
@@ -20088,6 +20771,8 @@
23078
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23080
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23082
+ build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23084
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23086
@@ -20210,7 +20895,7 @@
23087
neon_builtin_datum *d = &neon_builtin_data[i];
23089
const char* const modenames[] = {
23090
- "v8qi", "v4hi", "v2si", "v2sf", "di",
23091
+ "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23092
"v16qi", "v8hi", "v4si", "v4sf", "v2di",
23095
@@ -20413,8 +21098,9 @@
23096
case NEON_REINTERP:
23098
/* We iterate over 5 doubleword types, then 5 quadword
23100
- int rhs = d->mode % 5;
23101
+ types. V4HF is not a type used in reinterpret, so we translate
23102
+ d->mode to the correct index in reinterp_ftype_dreg. */
23103
+ int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
23104
switch (insn_data[d->code].operand[0].mode)
23106
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23107
@@ -20431,7 +21117,38 @@
23111
+ case NEON_FLOAT_WIDEN:
23113
+ tree eltype = NULL_TREE;
23114
+ tree return_type = NULL_TREE;
23116
+ switch (insn_data[d->code].operand[1].mode)
23119
+ eltype = V4HF_type_node;
23120
+ return_type = V4SF_type_node;
23122
+ default: gcc_unreachable ();
23124
+ ftype = build_function_type_list (return_type, eltype, NULL);
23127
+ case NEON_FLOAT_NARROW:
23129
+ tree eltype = NULL_TREE;
23130
+ tree return_type = NULL_TREE;
23132
+ switch (insn_data[d->code].operand[1].mode)
23135
+ eltype = V4SF_type_node;
23136
+ return_type = V4HF_type_node;
23138
+ default: gcc_unreachable ();
23140
+ ftype = build_function_type_list (return_type, eltype, NULL);
23144
gcc_unreachable ();
23146
@@ -21428,6 +22145,8 @@
23150
+ case NEON_FLOAT_WIDEN:
23151
+ case NEON_FLOAT_NARROW:
23152
case NEON_REINTERP:
23153
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
23154
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
23155
@@ -21625,7 +22344,7 @@
23159
- int fcode = DECL_FUNCTION_CODE (fndecl);
23160
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
23162
enum machine_mode tmode;
23163
enum machine_mode mode0;
23164
@@ -23342,7 +24061,7 @@
23165
all we really need to check here is if single register is to be
23166
returned, or multiple register return. */
23168
-thumb2_expand_return (void)
23169
+thumb2_expand_return (bool simple_return)
23172
unsigned long saved_regs_mask;
23173
@@ -23355,7 +24074,7 @@
23174
if (saved_regs_mask & (1 << i))
23177
- if (saved_regs_mask)
23178
+ if (!simple_return && saved_regs_mask)
23182
@@ -23633,6 +24352,7 @@
23184
if (frame_pointer_needed)
23187
/* Restore stack pointer if necessary. */
23190
@@ -23643,9 +24363,12 @@
23191
/* Force out any pending memory operations that reference stacked data
23192
before stack de-allocation occurs. */
23193
emit_insn (gen_blockage ());
23194
- emit_insn (gen_addsi3 (stack_pointer_rtx,
23195
- hard_frame_pointer_rtx,
23196
- GEN_INT (amount)));
23197
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23198
+ hard_frame_pointer_rtx,
23199
+ GEN_INT (amount)));
23200
+ arm_add_cfa_adjust_cfa_note (insn, amount,
23201
+ stack_pointer_rtx,
23202
+ hard_frame_pointer_rtx);
23204
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23206
@@ -23655,16 +24378,25 @@
23208
/* In Thumb-2 mode, the frame pointer points to the last saved
23210
- amount = offsets->locals_base - offsets->saved_regs;
23212
- emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23213
- hard_frame_pointer_rtx,
23214
- GEN_INT (amount)));
23215
+ amount = offsets->locals_base - offsets->saved_regs;
23218
+ insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23219
+ hard_frame_pointer_rtx,
23220
+ GEN_INT (amount)));
23221
+ arm_add_cfa_adjust_cfa_note (insn, amount,
23222
+ hard_frame_pointer_rtx,
23223
+ hard_frame_pointer_rtx);
23226
/* Force out any pending memory operations that reference stacked data
23227
before stack de-allocation occurs. */
23228
emit_insn (gen_blockage ());
23229
- emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23230
+ insn = emit_insn (gen_movsi (stack_pointer_rtx,
23231
+ hard_frame_pointer_rtx));
23232
+ arm_add_cfa_adjust_cfa_note (insn, 0,
23233
+ stack_pointer_rtx,
23234
+ hard_frame_pointer_rtx);
23235
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23237
emit_insn (gen_force_register_use (stack_pointer_rtx));
23238
@@ -23677,12 +24409,15 @@
23239
amount = offsets->outgoing_args - offsets->saved_regs;
23243
/* Force out any pending memory operations that reference stacked data
23244
before stack de-allocation occurs. */
23245
emit_insn (gen_blockage ());
23246
- emit_insn (gen_addsi3 (stack_pointer_rtx,
23247
- stack_pointer_rtx,
23248
- GEN_INT (amount)));
23249
+ tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
23250
+ stack_pointer_rtx,
23251
+ GEN_INT (amount)));
23252
+ arm_add_cfa_adjust_cfa_note (tmp, amount,
23253
+ stack_pointer_rtx, stack_pointer_rtx);
23254
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23256
emit_insn (gen_force_register_use (stack_pointer_rtx));
23257
@@ -23735,6 +24470,8 @@
23258
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23259
gen_rtx_REG (V2SImode, i),
23261
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
23262
+ stack_pointer_rtx, stack_pointer_rtx);
23265
if (saved_regs_mask)
23266
@@ -23782,6 +24519,9 @@
23267
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23268
gen_rtx_REG (SImode, i),
23270
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
23271
+ stack_pointer_rtx,
23272
+ stack_pointer_rtx);
23276
@@ -23792,6 +24532,8 @@
23279
thumb2_emit_ldrd_pop (saved_regs_mask);
23280
+ else if (TARGET_ARM && !IS_INTERRUPT (func_type))
23281
+ arm_emit_ldrd_pop (saved_regs_mask);
23283
arm_emit_multi_reg_pop (saved_regs_mask);
23285
@@ -23804,10 +24546,34 @@
23288
if (crtl->args.pretend_args_size)
23289
- emit_insn (gen_addsi3 (stack_pointer_rtx,
23290
- stack_pointer_rtx,
23291
- GEN_INT (crtl->args.pretend_args_size)));
23294
+ rtx dwarf = NULL_RTX;
23295
+ rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
23296
+ stack_pointer_rtx,
23297
+ GEN_INT (crtl->args.pretend_args_size)));
23299
+ RTX_FRAME_RELATED_P (tmp) = 1;
23301
+ if (cfun->machine->uses_anonymous_args)
23303
+ /* Restore pretend args. Refer arm_expand_prologue on how to save
23304
+ pretend_args in stack. */
23305
+ int num_regs = crtl->args.pretend_args_size / 4;
23306
+ saved_regs_mask = (0xf0 >> num_regs) & 0xf;
23307
+ for (j = 0, i = 0; j < num_regs; i++)
23308
+ if (saved_regs_mask & (1 << i))
23310
+ rtx reg = gen_rtx_REG (SImode, i);
23311
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
23314
+ REG_NOTES (tmp) = dwarf;
23316
+ arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
23317
+ stack_pointer_rtx, stack_pointer_rtx);
23320
if (!really_return)
23323
@@ -25060,7 +25826,7 @@
23325
/* Neon also supports V2SImode, etc. listed in the clause below. */
23326
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23327
- || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23328
+ || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23331
if ((TARGET_NEON || TARGET_IWMMXT)
23332
@@ -25223,9 +25989,8 @@
23334
nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23335
p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23336
- regno = (regno - FIRST_VFP_REGNUM) / 2;
23337
for (i = 0; i < nregs; i++)
23338
- XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23339
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
23343
@@ -25475,9 +26240,17 @@
23344
handled_one = true;
23347
+ /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
23348
+ to get correct dwarf information for shrink-wrap. We should not
23349
+ emit unwind information for it because these are used either for
23350
+ pretend arguments or notes to adjust sp and restore registers from
23352
+ case REG_CFA_ADJUST_CFA:
23353
+ case REG_CFA_RESTORE:
23356
case REG_CFA_DEF_CFA:
23357
case REG_CFA_EXPRESSION:
23358
- case REG_CFA_ADJUST_CFA:
23359
case REG_CFA_OFFSET:
23360
/* ??? Only handling here what we actually emit. */
23361
gcc_unreachable ();
23362
@@ -25875,6 +26648,7 @@
23370
@@ -25903,6 +26677,7 @@
23371
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23372
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23373
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23374
+ { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
23375
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23376
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23377
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23378
@@ -26001,6 +26776,60 @@
23379
return !TARGET_THUMB1;
23383
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
23385
+ enum machine_mode in_mode, out_mode;
23388
+ if (TREE_CODE (type_out) != VECTOR_TYPE
23389
+ || TREE_CODE (type_in) != VECTOR_TYPE
23390
+ || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
23391
+ return NULL_TREE;
23393
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
23394
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
23395
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
23396
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
23398
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
23399
+ decl of the vectorized builtin for the appropriate vector mode.
23400
+ NULL_TREE is returned if no such builtin is available. */
23401
+#undef ARM_CHECK_BUILTIN_MODE
23402
+#define ARM_CHECK_BUILTIN_MODE(C) \
23403
+ (out_mode == SFmode && out_n == C \
23404
+ && in_mode == SFmode && in_n == C)
23406
+#undef ARM_FIND_VRINT_VARIANT
23407
+#define ARM_FIND_VRINT_VARIANT(N) \
23408
+ (ARM_CHECK_BUILTIN_MODE (2) \
23409
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
23410
+ : (ARM_CHECK_BUILTIN_MODE (4) \
23411
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
23414
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
23416
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
23419
+ case BUILT_IN_FLOORF:
23420
+ return ARM_FIND_VRINT_VARIANT (vrintm);
23421
+ case BUILT_IN_CEILF:
23422
+ return ARM_FIND_VRINT_VARIANT (vrintp);
23423
+ case BUILT_IN_TRUNCF:
23424
+ return ARM_FIND_VRINT_VARIANT (vrintz);
23425
+ case BUILT_IN_ROUNDF:
23426
+ return ARM_FIND_VRINT_VARIANT (vrinta);
23428
+ return NULL_TREE;
23431
+ return NULL_TREE;
23433
+#undef ARM_CHECK_BUILTIN_MODE
23434
+#undef ARM_FIND_VRINT_VARIANT
23436
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
23437
static HOST_WIDE_INT
23438
arm_vector_alignment (const_tree type)
23439
@@ -26231,40 +27060,72 @@
23440
emit_insn (gen_memory_barrier ());
23443
-/* Emit the load-exclusive and store-exclusive instructions. */
23444
+/* Emit the load-exclusive and store-exclusive instructions.
23445
+ Use acquire and release versions if necessary. */
23448
-arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
23449
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
23451
rtx (*gen) (rtx, rtx);
23456
- case QImode: gen = gen_arm_load_exclusiveqi; break;
23457
- case HImode: gen = gen_arm_load_exclusivehi; break;
23458
- case SImode: gen = gen_arm_load_exclusivesi; break;
23459
- case DImode: gen = gen_arm_load_exclusivedi; break;
23461
- gcc_unreachable ();
23464
+ case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
23465
+ case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
23466
+ case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
23467
+ case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
23469
+ gcc_unreachable ();
23476
+ case QImode: gen = gen_arm_load_exclusiveqi; break;
23477
+ case HImode: gen = gen_arm_load_exclusivehi; break;
23478
+ case SImode: gen = gen_arm_load_exclusivesi; break;
23479
+ case DImode: gen = gen_arm_load_exclusivedi; break;
23481
+ gcc_unreachable ();
23485
emit_insn (gen (rval, mem));
23489
-arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
23490
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
23491
+ rtx mem, bool rel)
23493
rtx (*gen) (rtx, rtx, rtx);
23498
- case QImode: gen = gen_arm_store_exclusiveqi; break;
23499
- case HImode: gen = gen_arm_store_exclusivehi; break;
23500
- case SImode: gen = gen_arm_store_exclusivesi; break;
23501
- case DImode: gen = gen_arm_store_exclusivedi; break;
23503
- gcc_unreachable ();
23506
+ case QImode: gen = gen_arm_store_release_exclusiveqi; break;
23507
+ case HImode: gen = gen_arm_store_release_exclusivehi; break;
23508
+ case SImode: gen = gen_arm_store_release_exclusivesi; break;
23509
+ case DImode: gen = gen_arm_store_release_exclusivedi; break;
23511
+ gcc_unreachable ();
23518
+ case QImode: gen = gen_arm_store_exclusiveqi; break;
23519
+ case HImode: gen = gen_arm_store_exclusivehi; break;
23520
+ case SImode: gen = gen_arm_store_exclusivesi; break;
23521
+ case DImode: gen = gen_arm_store_exclusivedi; break;
23523
+ gcc_unreachable ();
23527
emit_insn (gen (bval, rval, mem));
23529
@@ -26299,6 +27160,15 @@
23530
mod_f = operands[7];
23531
mode = GET_MODE (mem);
23533
+ /* Normally the succ memory model must be stronger than fail, but in the
23534
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
23535
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
23537
+ if (TARGET_HAVE_LDACQ
23538
+ && INTVAL (mod_f) == MEMMODEL_ACQUIRE
23539
+ && INTVAL (mod_s) == MEMMODEL_RELEASE)
23540
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
23545
@@ -26373,8 +27243,20 @@
23546
scratch = operands[7];
23547
mode = GET_MODE (mem);
23549
- arm_pre_atomic_barrier (mod_s);
23550
+ bool use_acquire = TARGET_HAVE_LDACQ
23551
+ && !(mod_s == MEMMODEL_RELAXED
23552
+ || mod_s == MEMMODEL_CONSUME
23553
+ || mod_s == MEMMODEL_RELEASE);
23555
+ bool use_release = TARGET_HAVE_LDACQ
23556
+ && !(mod_s == MEMMODEL_RELAXED
23557
+ || mod_s == MEMMODEL_CONSUME
23558
+ || mod_s == MEMMODEL_ACQUIRE);
23560
+ /* Checks whether a barrier is needed and emits one accordingly. */
23561
+ if (!(use_acquire || use_release))
23562
+ arm_pre_atomic_barrier (mod_s);
23567
@@ -26383,7 +27265,7 @@
23569
label2 = gen_label_rtx ();
23571
- arm_emit_load_exclusive (mode, rval, mem);
23572
+ arm_emit_load_exclusive (mode, rval, mem, use_acquire);
23574
cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
23575
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23576
@@ -26391,7 +27273,7 @@
23577
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
23578
emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
23580
- arm_emit_store_exclusive (mode, scratch, mem, newval);
23581
+ arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
23583
/* Weak or strong, we want EQ to be true for success, so that we
23584
match the flags that we got from the compare above. */
23585
@@ -26410,7 +27292,9 @@
23586
if (mod_f != MEMMODEL_RELAXED)
23587
emit_label (label2);
23589
- arm_post_atomic_barrier (mod_s);
23590
+ /* Checks whether a barrier is needed and emits one accordingly. */
23591
+ if (!(use_acquire || use_release))
23592
+ arm_post_atomic_barrier (mod_s);
23594
if (mod_f == MEMMODEL_RELAXED)
23595
emit_label (label2);
23596
@@ -26425,8 +27309,20 @@
23597
enum machine_mode wmode = (mode == DImode ? DImode : SImode);
23600
- arm_pre_atomic_barrier (model);
23601
+ bool use_acquire = TARGET_HAVE_LDACQ
23602
+ && !(model == MEMMODEL_RELAXED
23603
+ || model == MEMMODEL_CONSUME
23604
+ || model == MEMMODEL_RELEASE);
23606
+ bool use_release = TARGET_HAVE_LDACQ
23607
+ && !(model == MEMMODEL_RELAXED
23608
+ || model == MEMMODEL_CONSUME
23609
+ || model == MEMMODEL_ACQUIRE);
23611
+ /* Checks whether a barrier is needed and emits one accordingly. */
23612
+ if (!(use_acquire || use_release))
23613
+ arm_pre_atomic_barrier (model);
23615
label = gen_label_rtx ();
23616
emit_label (label);
23618
@@ -26438,7 +27334,7 @@
23620
value = simplify_gen_subreg (wmode, value, mode, 0);
23622
- arm_emit_load_exclusive (mode, old_out, mem);
23623
+ arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
23627
@@ -26486,12 +27382,15 @@
23631
- arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
23632
+ arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
23635
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23636
emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
23638
- arm_post_atomic_barrier (model);
23639
+ /* Checks whether a barrier is needed and emits one accordingly. */
23640
+ if (!(use_acquire || use_release))
23641
+ arm_post_atomic_barrier (model);
23644
#define MAX_VECT_LEN 16
23645
@@ -27431,4 +28330,12 @@
23649
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23651
+static unsigned HOST_WIDE_INT
23652
+arm_asan_shadow_offset (void)
23654
+ return (unsigned HOST_WIDE_INT) 1 << 29;
23657
#include "gt-arm.h"
23658
--- a/src/gcc/config/arm/arm.h
23659
+++ b/src/gcc/config/arm/arm.h
23660
@@ -183,6 +183,11 @@
23662
#define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1))
23664
+/* The maximaum number of instructions that is beneficial to
23665
+ conditionally execute. */
23666
+#undef MAX_CONDITIONAL_EXECUTE
23667
+#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute ()
23669
extern int arm_target_label;
23670
extern int arm_ccfsm_state;
23671
extern GTY(()) rtx arm_target_insn;
23672
@@ -350,10 +355,16 @@
23673
#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \
23676
+/* Nonzero if this chip supports load-acquire and store-release. */
23677
+#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8)
23679
/* Nonzero if integer division instructions supported. */
23680
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
23681
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
23683
+/* Should NEON be used for 64-bits bitops. */
23684
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
23686
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
23687
then TARGET_AAPCS_BASED must be true -- but the converse does not
23688
hold. TARGET_BPABI implies the use of the BPABI runtime library,
23689
@@ -539,6 +550,10 @@
23690
/* Nonzero if chip supports integer division instruction in Thumb mode. */
23691
extern int arm_arch_thumb_hwdiv;
23693
+/* Nonzero if we should use Neon to handle 64-bits operations rather
23694
+ than core registers. */
23695
+extern int prefer_neon_for_64bits;
23697
#ifndef TARGET_DEFAULT
23698
#define TARGET_DEFAULT (MASK_APCS_FRAME)
23700
@@ -1040,7 +1055,7 @@
23701
/* Modes valid for Neon D registers. */
23702
#define VALID_NEON_DREG_MODE(MODE) \
23703
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
23704
- || (MODE) == V2SFmode || (MODE) == DImode)
23705
+ || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
23707
/* Modes valid for Neon Q registers. */
23708
#define VALID_NEON_QREG_MODE(MODE) \
23709
@@ -1130,6 +1145,7 @@
23713
+ CALLER_SAVE_REGS,
23717
@@ -1156,6 +1172,7 @@
23721
+ "CALLER_SAVE_REGS", \
23724
"VFP_D0_D7_REGS", \
23725
@@ -1181,6 +1198,7 @@
23726
{ 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
23727
{ 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \
23728
{ 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \
23729
+ { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
23730
{ 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
23731
{ 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
23732
{ 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \
23733
@@ -1639,7 +1657,7 @@
23735
#define EXIT_IGNORE_STACK 1
23737
-#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNUM)
23738
+#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == LR_REGNUM)
23740
/* Determine if the epilogue should be output as RTL.
23741
You should override this if you define FUNCTION_EXTRA_EPILOGUE. */
23742
--- a/src/gcc/config/arm/cortex-a8.md
23743
+++ b/src/gcc/config/arm/cortex-a8.md
23744
@@ -139,22 +139,22 @@
23746
(define_insn_reservation "cortex_a8_mul" 6
23747
(and (eq_attr "tune" "cortexa8")
23748
- (eq_attr "insn" "mul,smulxy,smmul"))
23749
+ (eq_attr "type" "mul,smulxy,smmul"))
23750
"cortex_a8_multiply_2")
23752
(define_insn_reservation "cortex_a8_mla" 6
23753
(and (eq_attr "tune" "cortexa8")
23754
- (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
23755
+ (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
23756
"cortex_a8_multiply_2")
23758
(define_insn_reservation "cortex_a8_mull" 7
23759
(and (eq_attr "tune" "cortexa8")
23760
- (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
23761
+ (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy"))
23762
"cortex_a8_multiply_3")
23764
(define_insn_reservation "cortex_a8_smulwy" 5
23765
(and (eq_attr "tune" "cortexa8")
23766
- (eq_attr "insn" "smulwy,smuad,smusd"))
23767
+ (eq_attr "type" "smulwy,smuad,smusd"))
23768
"cortex_a8_multiply")
23770
;; smlald and smlsld are multiply-accumulate instructions but do not
23771
@@ -162,7 +162,7 @@
23772
;; cannot go in cortex_a8_mla above. (See below for bypass details.)
23773
(define_insn_reservation "cortex_a8_smlald" 6
23774
(and (eq_attr "tune" "cortexa8")
23775
- (eq_attr "insn" "smlald,smlsld"))
23776
+ (eq_attr "type" "smlald,smlsld"))
23777
"cortex_a8_multiply_2")
23779
;; A multiply with a single-register result or an MLA, followed by an
23780
--- a/src/gcc/config/arm/arm-fixed.md
23781
+++ b/src/gcc/config/arm/arm-fixed.md
23782
@@ -19,12 +19,13 @@
23783
;; This file contains ARM instructions that support fixed-point operations.
23785
(define_insn "add<mode>3"
23786
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
23787
- (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
23788
- (match_operand:FIXED 2 "s_register_operand" "r")))]
23789
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
23790
+ (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
23791
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
23793
"add%?\\t%0, %1, %2"
23794
- [(set_attr "predicable" "yes")])
23795
+ [(set_attr "predicable" "yes")
23796
+ (set_attr "predicable_short_it" "yes,no")])
23798
(define_insn "add<mode>3"
23799
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
23801
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
23803
"sadd<qaddsub_suf>%?\\t%0, %1, %2"
23804
- [(set_attr "predicable" "yes")])
23805
+ [(set_attr "predicable" "yes")
23806
+ (set_attr "predicable_short_it" "no")])
23808
(define_insn "usadd<mode>3"
23809
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
23811
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
23813
"uqadd<qaddsub_suf>%?\\t%0, %1, %2"
23814
- [(set_attr "predicable" "yes")])
23815
+ [(set_attr "predicable" "yes")
23816
+ (set_attr "predicable_short_it" "no")])
23818
(define_insn "ssadd<mode>3"
23819
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
23820
@@ -48,15 +51,17 @@
23821
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
23823
"qadd<qaddsub_suf>%?\\t%0, %1, %2"
23824
- [(set_attr "predicable" "yes")])
23825
+ [(set_attr "predicable" "yes")
23826
+ (set_attr "predicable_short_it" "no")])
23828
(define_insn "sub<mode>3"
23829
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
23830
- (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
23831
- (match_operand:FIXED 2 "s_register_operand" "r")))]
23832
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
23833
+ (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
23834
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
23836
"sub%?\\t%0, %1, %2"
23837
- [(set_attr "predicable" "yes")])
23838
+ [(set_attr "predicable" "yes")
23839
+ (set_attr "predicable_short_it" "yes,no")])
23841
(define_insn "sub<mode>3"
23842
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
23844
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
23846
"ssub<qaddsub_suf>%?\\t%0, %1, %2"
23847
- [(set_attr "predicable" "yes")])
23848
+ [(set_attr "predicable" "yes")
23849
+ (set_attr "predicable_short_it" "no")])
23851
(define_insn "ussub<mode>3"
23852
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
23854
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
23856
"uqsub<qaddsub_suf>%?\\t%0, %1, %2"
23857
- [(set_attr "predicable" "yes")])
23858
+ [(set_attr "predicable" "yes")
23859
+ (set_attr "predicable_short_it" "no")])
23861
(define_insn "sssub<mode>3"
23862
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
23864
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
23866
"qsub<qaddsub_suf>%?\\t%0, %1, %2"
23867
- [(set_attr "predicable" "yes")])
23868
+ [(set_attr "predicable" "yes")
23869
+ (set_attr "predicable_short_it" "no")])
23871
;; Fractional multiplies.
23873
@@ -374,6 +382,7 @@
23874
"TARGET_32BIT && arm_arch6"
23875
"ssat%?\\t%0, #16, %2%S1"
23876
[(set_attr "predicable" "yes")
23877
+ (set_attr "predicable_short_it" "no")
23878
(set_attr "insn" "sat")
23879
(set_attr "shift" "1")
23880
(set_attr "type" "alu_shift")])
23881
@@ -384,4 +393,5 @@
23883
"usat%?\\t%0, #16, %1"
23884
[(set_attr "predicable" "yes")
23885
+ (set_attr "predicable_short_it" "no")
23886
(set_attr "insn" "sat")])
23887
--- a/src/gcc/config/arm/unspecs.md
23888
+++ b/src/gcc/config/arm/unspecs.md
23890
; FPSCR rounding mode and signal inexactness.
23891
UNSPEC_VRINTA ; Represent a float to integral float rounding
23892
; towards nearest, ties away from zero.
23893
+ UNSPEC_RRX ; Rotate Right with Extend shifts register right
23894
+ ; by one place, with Carry flag shifted into bit[31].
23897
(define_c_enum "unspec" [
23898
@@ -139,6 +141,10 @@
23899
VUNSPEC_ATOMIC_OP ; Represent an atomic operation.
23900
VUNSPEC_LL ; Represent a load-register-exclusive.
23901
VUNSPEC_SC ; Represent a store-register-exclusive.
23902
+ VUNSPEC_LAX ; Represent a load-register-acquire-exclusive.
23903
+ VUNSPEC_SLX ; Represent a store-register-release-exclusive.
23904
+ VUNSPEC_LDA ; Represent a store-register-acquire.
23905
+ VUNSPEC_STL ; Represent a store-register-release.
23908
;; Enumerators for NEON unspecs.
23909
--- a/src/gcc/config/arm/cortex-m4.md
23910
+++ b/src/gcc/config/arm/cortex-m4.md
23912
;; ALU and multiply is one cycle.
23913
(define_insn_reservation "cortex_m4_alu" 1
23914
(and (eq_attr "tune" "cortexm4")
23915
- (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult"))
23916
+ (ior (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,\
23917
+ alu_shift,alu_shift_reg")
23918
+ (ior (eq_attr "mul32" "yes")
23919
+ (eq_attr "mul64" "yes"))))
23922
;; Byte, half-word and word load is two cycles.
23923
--- a/src/gcc/config/arm/linux-eabi.h
23924
+++ b/src/gcc/config/arm/linux-eabi.h
23925
@@ -84,10 +84,14 @@
23926
LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
23927
LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
23929
+#undef ASAN_CC1_SPEC
23930
+#define ASAN_CC1_SPEC "%{fsanitize=*:-funwind-tables}"
23934
- LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
23935
- GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC)
23936
+ LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC, \
23937
+ GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC " " \
23938
+ ANDROID_CC1_SPEC)
23940
#define CC1PLUS_SPEC \
23941
LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
23942
--- a/src/gcc/config/arm/arm-cores.def
23943
+++ b/src/gcc/config/arm/arm-cores.def
23944
@@ -129,9 +129,11 @@
23945
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
23946
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
23947
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
23948
+ARM_CORE("cortex-a53", cortexa53, 8A, FL_LDSCHED, cortex_a5)
23949
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
23950
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
23951
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
23952
+ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
23953
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
23954
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
23955
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
23956
--- a/src/gcc/config/arm/cortex-r4.md
23957
+++ b/src/gcc/config/arm/cortex-r4.md
23958
@@ -128,32 +128,32 @@
23960
(define_insn_reservation "cortex_r4_mul_4" 4
23961
(and (eq_attr "tune_cortexr4" "yes")
23962
- (eq_attr "insn" "mul,smmul"))
23963
+ (eq_attr "type" "mul,smmul"))
23966
(define_insn_reservation "cortex_r4_mul_3" 3
23967
(and (eq_attr "tune_cortexr4" "yes")
23968
- (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
23969
+ (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
23972
(define_insn_reservation "cortex_r4_mla_4" 4
23973
(and (eq_attr "tune_cortexr4" "yes")
23974
- (eq_attr "insn" "mla,smmla"))
23975
+ (eq_attr "type" "mla,smmla"))
23978
(define_insn_reservation "cortex_r4_mla_3" 3
23979
(and (eq_attr "tune_cortexr4" "yes")
23980
- (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
23981
+ (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
23984
(define_insn_reservation "cortex_r4_smlald" 3
23985
(and (eq_attr "tune_cortexr4" "yes")
23986
- (eq_attr "insn" "smlald,smlsld"))
23987
+ (eq_attr "type" "smlald,smlsld"))
23990
(define_insn_reservation "cortex_r4_mull" 4
23991
(and (eq_attr "tune_cortexr4" "yes")
23992
- (eq_attr "insn" "smull,umull,umlal,umaal"))
23993
+ (eq_attr "type" "smull,umull,umlal,umaal"))
23996
;; A multiply or an MLA with a single-register result, followed by an
23997
@@ -196,12 +196,12 @@
23998
;; This gives a latency of nine for udiv and ten for sdiv.
23999
(define_insn_reservation "cortex_r4_udiv" 9
24000
(and (eq_attr "tune_cortexr4" "yes")
24001
- (eq_attr "insn" "udiv"))
24002
+ (eq_attr "type" "udiv"))
24005
(define_insn_reservation "cortex_r4_sdiv" 10
24006
(and (eq_attr "tune_cortexr4" "yes")
24007
- (eq_attr "insn" "sdiv"))
24008
+ (eq_attr "type" "sdiv"))
24009
"cortex_r4_div_10")
24011
;; Branches. We assume correct prediction.
24012
--- a/src/gcc/config/arm/arm-tune.md
24013
+++ b/src/gcc/config/arm/arm-tune.md
24015
;; -*- buffer-read-only: t -*-
24016
;; Generated automatically by gentune.sh from arm-cores.def
24017
(define_attr "tune"
24018
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
24019
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexr4,cortexr4f,cortexr5,cortexr7,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
24020
(const (symbol_ref "((enum attr_tune) arm_tune)")))
24021
--- a/src/gcc/config/arm/arm-protos.h
24022
+++ b/src/gcc/config/arm/arm-protos.h
24023
@@ -24,12 +24,13 @@
24025
extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
24026
extern int use_return_insn (int, rtx);
24027
+extern bool use_simple_return_p (void);
24028
extern enum reg_class arm_regno_class (int);
24029
extern void arm_load_pic_register (unsigned long);
24030
extern int arm_volatile_func (void);
24031
extern void arm_expand_prologue (void);
24032
extern void arm_expand_epilogue (bool);
24033
-extern void thumb2_expand_return (void);
24034
+extern void thumb2_expand_return (bool);
24035
extern const char *arm_strip_name_encoding (const char *);
24036
extern void arm_asm_output_labelref (FILE *, const char *);
24037
extern void thumb2_asm_output_opcode (FILE *);
24039
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
24040
rtx (*) (rtx, rtx, rtx));
24041
extern rtx neon_make_constant (rtx);
24042
+extern tree arm_builtin_vectorized_function (tree, tree, tree);
24043
extern void neon_expand_vector_init (rtx, rtx);
24044
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
24045
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
24046
@@ -117,7 +119,9 @@
24047
extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
24048
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
24049
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
24050
+extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
24051
extern int arm_gen_movmemqi (rtx *);
24052
+extern bool gen_movmem_ldrd_strd (rtx *);
24053
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
24054
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
24056
@@ -224,6 +228,8 @@
24058
extern void arm_order_regs_for_local_alloc (void);
24060
+extern int arm_max_conditional_execute ();
24062
/* Vectorizer cost model implementation. */
24063
struct cpu_vec_costs {
24064
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
24065
@@ -253,8 +259,7 @@
24066
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
24067
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
24068
int constant_limit;
24069
- /* Maximum number of instructions to conditionalise in
24070
- arm_final_prescan_insn. */
24071
+ /* Maximum number of instructions to conditionalise. */
24072
int max_insns_skipped;
24073
int num_prefetch_slots;
24075
@@ -269,6 +274,8 @@
24076
bool logical_op_non_short_circuit[2];
24077
/* Vectorizer costs. */
24078
const struct cpu_vec_costs* vec_costs;
24079
+ /* Prefer Neon for 64-bit bitops. */
24080
+ bool prefer_neon_for_64bits;
24083
extern const struct tune_params *current_tune;
24084
--- a/src/gcc/config/arm/vfp.md
24085
+++ b/src/gcc/config/arm/vfp.md
24087
;; along with GCC; see the file COPYING3. If not see
24088
;; <http://www.gnu.org/licenses/>. */
24090
-;; The VFP "type" attributes differ from those used in the FPA model.
24091
-;; fcpys Single precision cpy.
24092
-;; ffariths Single precision abs, neg.
24093
-;; ffarithd Double precision abs, neg, cpy.
24094
-;; fadds Single precision add/sub.
24095
-;; faddd Double precision add/sub.
24096
-;; fconsts Single precision load immediate.
24097
-;; fconstd Double precision load immediate.
24098
-;; fcmps Single precision comparison.
24099
-;; fcmpd Double precision comparison.
24100
-;; fmuls Single precision multiply.
24101
-;; fmuld Double precision multiply.
24102
-;; fmacs Single precision multiply-accumulate.
24103
-;; fmacd Double precision multiply-accumulate.
24104
-;; ffmas Single precision fused multiply-accumulate.
24105
-;; ffmad Double precision fused multiply-accumulate.
24106
-;; fdivs Single precision sqrt or division.
24107
-;; fdivd Double precision sqrt or division.
24108
-;; f_flag fmstat operation
24109
-;; f_load[sd] Floating point load from memory.
24110
-;; f_store[sd] Floating point store to memory.
24111
-;; f_2_r Transfer vfp to arm reg.
24112
-;; r_2_f Transfer arm to vfp reg.
24113
-;; f_cvt Convert floating<->integral
24116
;; ??? For now do not allow loading constants into vfp regs. This causes
24117
;; problems because small constants get converted into adds.
24118
@@ -87,53 +62,60 @@
24120
;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
24121
;; high/low register alternatives for loads and stores here.
24122
+;; The l/Py alternative should come after r/I to ensure that the short variant
24123
+;; is chosen with length 2 when the instruction is predicated for
24124
+;; arm_restrict_it.
24125
(define_insn "*thumb2_movsi_vfp"
24126
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
24127
- (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
24128
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
24129
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
24130
"TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
24131
&& ( s_register_operand (operands[0], SImode)
24132
|| s_register_operand (operands[1], SImode))"
24134
switch (which_alternative)
24140
return \"mov%?\\t%0, %1\";
24143
return \"mvn%?\\t%0, #%B1\";
24146
return \"movw%?\\t%0, %1\";
24150
return \"ldr%?\\t%0, %1\";
24154
return \"str%?\\t%1, %0\";
24157
return \"fmsr%?\\t%0, %1\\t%@ int\";
24160
return \"fmrs%?\\t%0, %1\\t%@ int\";
24163
return \"fcpys%?\\t%0, %1\\t%@ int\";
24164
- case 11: case 12:
24165
+ case 12: case 13:
24166
return output_move_vfp (operands);
24168
gcc_unreachable ();
24171
[(set_attr "predicable" "yes")
24172
- (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
24173
- (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
24174
- (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
24175
- (set_attr "pool_range" "*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
24176
- (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
24177
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
24178
+ (set_attr "type" "*,*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
24179
+ (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
24180
+ (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
24181
+ (set_attr "insn" "mov,mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
24182
+ (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
24183
+ (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
24189
(define_insn "*movdi_vfp"
24190
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv")
24191
- (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
24192
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
24193
+ (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
24194
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8
24195
&& ( register_operand (operands[0], DImode)
24196
|| register_operand (operands[1], DImode))
24197
@@ -412,6 +394,7 @@
24200
[(set_attr "predicable" "yes")
24201
+ (set_attr "predicable_short_it" "no")
24203
"r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
24204
(set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
24205
@@ -420,7 +403,6 @@
24206
(set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
24212
(define_insn "*movdf_vfp"
24213
@@ -550,7 +532,7 @@
24214
[(match_operand 4 "cc_register" "") (const_int 0)])
24215
(match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
24216
(match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
24217
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
24218
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
24220
it\\t%D3\;fcpys%D3\\t%0, %2
24221
it\\t%d3\;fcpys%d3\\t%0, %1
24222
@@ -598,7 +580,7 @@
24223
[(match_operand 4 "cc_register" "") (const_int 0)])
24224
(match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
24225
(match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
24226
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24227
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
24229
it\\t%D3\;fcpyd%D3\\t%P0, %P2
24230
it\\t%d3\;fcpyd%d3\\t%P0, %P1
24231
@@ -624,6 +606,7 @@
24232
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24234
[(set_attr "predicable" "yes")
24235
+ (set_attr "predicable_short_it" "no")
24236
(set_attr "type" "ffariths")]
24239
@@ -633,6 +616,7 @@
24240
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24241
"fabsd%?\\t%P0, %P1"
24242
[(set_attr "predicable" "yes")
24243
+ (set_attr "predicable_short_it" "no")
24244
(set_attr "type" "ffarithd")]
24247
@@ -644,6 +628,7 @@
24249
eor%?\\t%0, %1, #-2147483648"
24250
[(set_attr "predicable" "yes")
24251
+ (set_attr "predicable_short_it" "no")
24252
(set_attr "type" "ffariths")]
24255
@@ -689,6 +674,7 @@
24258
[(set_attr "predicable" "yes")
24259
+ (set_attr "predicable_short_it" "no")
24260
(set_attr "length" "4,4,8")
24261
(set_attr "type" "ffarithd")]
24263
@@ -703,6 +689,7 @@
24264
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24265
"fadds%?\\t%0, %1, %2"
24266
[(set_attr "predicable" "yes")
24267
+ (set_attr "predicable_short_it" "no")
24268
(set_attr "type" "fadds")]
24271
@@ -713,6 +700,7 @@
24272
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24273
"faddd%?\\t%P0, %P1, %P2"
24274
[(set_attr "predicable" "yes")
24275
+ (set_attr "predicable_short_it" "no")
24276
(set_attr "type" "faddd")]
24279
@@ -724,6 +712,7 @@
24280
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24281
"fsubs%?\\t%0, %1, %2"
24282
[(set_attr "predicable" "yes")
24283
+ (set_attr "predicable_short_it" "no")
24284
(set_attr "type" "fadds")]
24287
@@ -734,6 +723,7 @@
24288
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24289
"fsubd%?\\t%P0, %P1, %P2"
24290
[(set_attr "predicable" "yes")
24291
+ (set_attr "predicable_short_it" "no")
24292
(set_attr "type" "faddd")]
24295
@@ -747,6 +737,7 @@
24296
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24297
"fdivs%?\\t%0, %1, %2"
24298
[(set_attr "predicable" "yes")
24299
+ (set_attr "predicable_short_it" "no")
24300
(set_attr "type" "fdivs")]
24303
@@ -757,6 +748,7 @@
24304
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24305
"fdivd%?\\t%P0, %P1, %P2"
24306
[(set_attr "predicable" "yes")
24307
+ (set_attr "predicable_short_it" "no")
24308
(set_attr "type" "fdivd")]
24311
@@ -770,6 +762,7 @@
24312
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24313
"fmuls%?\\t%0, %1, %2"
24314
[(set_attr "predicable" "yes")
24315
+ (set_attr "predicable_short_it" "no")
24316
(set_attr "type" "fmuls")]
24319
@@ -780,6 +773,7 @@
24320
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24321
"fmuld%?\\t%P0, %P1, %P2"
24322
[(set_attr "predicable" "yes")
24323
+ (set_attr "predicable_short_it" "no")
24324
(set_attr "type" "fmuld")]
24327
@@ -790,6 +784,7 @@
24328
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24329
"fnmuls%?\\t%0, %1, %2"
24330
[(set_attr "predicable" "yes")
24331
+ (set_attr "predicable_short_it" "no")
24332
(set_attr "type" "fmuls")]
24335
@@ -800,6 +795,7 @@
24336
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24337
"fnmuld%?\\t%P0, %P1, %P2"
24338
[(set_attr "predicable" "yes")
24339
+ (set_attr "predicable_short_it" "no")
24340
(set_attr "type" "fmuld")]
24343
@@ -815,6 +811,7 @@
24344
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24345
"fmacs%?\\t%0, %2, %3"
24346
[(set_attr "predicable" "yes")
24347
+ (set_attr "predicable_short_it" "no")
24348
(set_attr "type" "fmacs")]
24351
@@ -826,6 +823,7 @@
24352
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24353
"fmacd%?\\t%P0, %P2, %P3"
24354
[(set_attr "predicable" "yes")
24355
+ (set_attr "predicable_short_it" "no")
24356
(set_attr "type" "fmacd")]
24359
@@ -838,6 +836,7 @@
24360
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24361
"fmscs%?\\t%0, %2, %3"
24362
[(set_attr "predicable" "yes")
24363
+ (set_attr "predicable_short_it" "no")
24364
(set_attr "type" "fmacs")]
24367
@@ -849,6 +848,7 @@
24368
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24369
"fmscd%?\\t%P0, %P2, %P3"
24370
[(set_attr "predicable" "yes")
24371
+ (set_attr "predicable_short_it" "no")
24372
(set_attr "type" "fmacd")]
24375
@@ -861,6 +861,7 @@
24376
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24377
"fnmacs%?\\t%0, %2, %3"
24378
[(set_attr "predicable" "yes")
24379
+ (set_attr "predicable_short_it" "no")
24380
(set_attr "type" "fmacs")]
24383
@@ -872,6 +873,7 @@
24384
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24385
"fnmacd%?\\t%P0, %P2, %P3"
24386
[(set_attr "predicable" "yes")
24387
+ (set_attr "predicable_short_it" "no")
24388
(set_attr "type" "fmacd")]
24391
@@ -886,6 +888,7 @@
24392
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24393
"fnmscs%?\\t%0, %2, %3"
24394
[(set_attr "predicable" "yes")
24395
+ (set_attr "predicable_short_it" "no")
24396
(set_attr "type" "fmacs")]
24399
@@ -898,6 +901,7 @@
24400
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24401
"fnmscd%?\\t%P0, %P2, %P3"
24402
[(set_attr "predicable" "yes")
24403
+ (set_attr "predicable_short_it" "no")
24404
(set_attr "type" "fmacd")]
24407
@@ -911,6 +915,7 @@
24408
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
24409
"vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
24410
[(set_attr "predicable" "yes")
24411
+ (set_attr "predicable_short_it" "no")
24412
(set_attr "type" "ffma<vfp_type>")]
24415
@@ -923,6 +928,7 @@
24416
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
24417
"vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
24418
[(set_attr "predicable" "yes")
24419
+ (set_attr "predicable_short_it" "no")
24420
(set_attr "type" "ffma<vfp_type>")]
24423
@@ -934,6 +940,7 @@
24424
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
24425
"vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
24426
[(set_attr "predicable" "yes")
24427
+ (set_attr "predicable_short_it" "no")
24428
(set_attr "type" "ffma<vfp_type>")]
24431
@@ -946,6 +953,7 @@
24432
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
24433
"vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
24434
[(set_attr "predicable" "yes")
24435
+ (set_attr "predicable_short_it" "no")
24436
(set_attr "type" "ffma<vfp_type>")]
24439
@@ -958,6 +966,7 @@
24440
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24441
"fcvtds%?\\t%P0, %1"
24442
[(set_attr "predicable" "yes")
24443
+ (set_attr "predicable_short_it" "no")
24444
(set_attr "type" "f_cvt")]
24447
@@ -967,6 +976,7 @@
24448
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24449
"fcvtsd%?\\t%0, %P1"
24450
[(set_attr "predicable" "yes")
24451
+ (set_attr "predicable_short_it" "no")
24452
(set_attr "type" "f_cvt")]
24455
@@ -976,6 +986,7 @@
24456
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
24457
"vcvtb%?.f32.f16\\t%0, %1"
24458
[(set_attr "predicable" "yes")
24459
+ (set_attr "predicable_short_it" "no")
24460
(set_attr "type" "f_cvt")]
24463
@@ -985,6 +996,7 @@
24464
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
24465
"vcvtb%?.f16.f32\\t%0, %1"
24466
[(set_attr "predicable" "yes")
24467
+ (set_attr "predicable_short_it" "no")
24468
(set_attr "type" "f_cvt")]
24471
@@ -994,6 +1006,7 @@
24472
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24473
"ftosizs%?\\t%0, %1"
24474
[(set_attr "predicable" "yes")
24475
+ (set_attr "predicable_short_it" "no")
24476
(set_attr "type" "f_cvt")]
24479
@@ -1003,6 +1016,7 @@
24480
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24481
"ftosizd%?\\t%0, %P1"
24482
[(set_attr "predicable" "yes")
24483
+ (set_attr "predicable_short_it" "no")
24484
(set_attr "type" "f_cvt")]
24487
@@ -1013,6 +1027,7 @@
24488
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24489
"ftouizs%?\\t%0, %1"
24490
[(set_attr "predicable" "yes")
24491
+ (set_attr "predicable_short_it" "no")
24492
(set_attr "type" "f_cvt")]
24495
@@ -1022,6 +1037,7 @@
24496
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24497
"ftouizd%?\\t%0, %P1"
24498
[(set_attr "predicable" "yes")
24499
+ (set_attr "predicable_short_it" "no")
24500
(set_attr "type" "f_cvt")]
24503
@@ -1032,6 +1048,7 @@
24504
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24505
"fsitos%?\\t%0, %1"
24506
[(set_attr "predicable" "yes")
24507
+ (set_attr "predicable_short_it" "no")
24508
(set_attr "type" "f_cvt")]
24511
@@ -1041,6 +1058,7 @@
24512
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24513
"fsitod%?\\t%P0, %1"
24514
[(set_attr "predicable" "yes")
24515
+ (set_attr "predicable_short_it" "no")
24516
(set_attr "type" "f_cvt")]
24519
@@ -1051,6 +1069,7 @@
24520
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24521
"fuitos%?\\t%0, %1"
24522
[(set_attr "predicable" "yes")
24523
+ (set_attr "predicable_short_it" "no")
24524
(set_attr "type" "f_cvt")]
24527
@@ -1060,6 +1079,7 @@
24528
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24529
"fuitod%?\\t%P0, %1"
24530
[(set_attr "predicable" "yes")
24531
+ (set_attr "predicable_short_it" "no")
24532
(set_attr "type" "f_cvt")]
24535
@@ -1072,6 +1092,7 @@
24536
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
24537
"fsqrts%?\\t%0, %1"
24538
[(set_attr "predicable" "yes")
24539
+ (set_attr "predicable_short_it" "no")
24540
(set_attr "type" "fdivs")]
24543
@@ -1081,6 +1102,7 @@
24544
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
24545
"fsqrtd%?\\t%P0, %P1"
24546
[(set_attr "predicable" "yes")
24547
+ (set_attr "predicable_short_it" "no")
24548
(set_attr "type" "fdivd")]
24551
@@ -1168,6 +1190,7 @@
24554
[(set_attr "predicable" "yes")
24555
+ (set_attr "predicable_short_it" "no")
24556
(set_attr "type" "fcmps")]
24559
@@ -1180,6 +1203,7 @@
24562
[(set_attr "predicable" "yes")
24563
+ (set_attr "predicable_short_it" "no")
24564
(set_attr "type" "fcmps")]
24567
@@ -1192,6 +1216,7 @@
24570
[(set_attr "predicable" "yes")
24571
+ (set_attr "predicable_short_it" "no")
24572
(set_attr "type" "fcmpd")]
24575
@@ -1204,6 +1229,7 @@
24576
fcmped%?\\t%P0, %P1
24578
[(set_attr "predicable" "yes")
24579
+ (set_attr "predicable_short_it" "no")
24580
(set_attr "type" "fcmpd")]
24583
@@ -1263,6 +1289,7 @@
24584
"TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
24585
"vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
24586
[(set_attr "predicable" "<vrint_predicable>")
24587
+ (set_attr "predicable_short_it" "no")
24588
(set_attr "type" "f_rint<vfp_type>")]
24591
--- a/src/gcc/config/arm/neon.md
24592
+++ b/src/gcc/config/arm/neon.md
24593
@@ -487,7 +487,7 @@
24594
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
24595
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
24596
(set_attr "length" "*,8,8,*,8,8,8")
24597
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
24598
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
24601
(define_insn "*sub<mode>3_neon"
24602
@@ -524,7 +524,7 @@
24603
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
24604
(set_attr "conds" "*,clob,clob,clob,*")
24605
(set_attr "length" "*,8,8,8,*")
24606
- (set_attr "arch" "nota8,*,*,*,onlya8")]
24607
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
24610
(define_insn "*mul<mode>3_neon"
24611
@@ -699,7 +699,7 @@
24613
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
24614
(set_attr "length" "*,*,8,8,*,*")
24615
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
24616
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
24619
;; The concrete forms of the Neon immediate-logic instructions are vbic and
24620
@@ -724,29 +724,6 @@
24621
[(set_attr "neon_type" "neon_int_1")]
24624
-(define_insn "anddi3_neon"
24625
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
24626
- (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
24627
- (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))]
24630
- switch (which_alternative)
24632
- case 0: /* fall through */
24633
- case 4: return "vand\t%P0, %P1, %P2";
24634
- case 1: /* fall through */
24635
- case 5: return neon_output_logic_immediate ("vand", &operands[2],
24636
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
24637
- case 2: return "#";
24638
- case 3: return "#";
24639
- default: gcc_unreachable ();
24642
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
24643
- (set_attr "length" "*,*,8,8,*,*")
24644
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
24647
(define_insn "orn<mode>3_neon"
24648
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
24649
(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
24650
@@ -840,7 +817,7 @@
24651
veor\t%P0, %P1, %P2"
24652
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
24653
(set_attr "length" "*,8,8,*")
24654
- (set_attr "arch" "nota8,*,*,onlya8")]
24655
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
24658
(define_insn "one_cmpl<mode>2"
24659
@@ -1162,7 +1139,7 @@
24663
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
24664
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
24665
(set_attr "opt" "*,*,speed,speed,*,*")]
24668
@@ -1263,7 +1240,7 @@
24672
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
24673
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
24674
(set_attr "opt" "*,*,speed,speed,*,*")]
24677
@@ -3281,6 +3258,24 @@
24678
(const_string "neon_fp_vadd_qqq_vabs_qq")))]
24681
+(define_insn "neon_vcvtv4sfv4hf"
24682
+ [(set (match_operand:V4SF 0 "s_register_operand" "=w")
24683
+ (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
24685
+ "TARGET_NEON && TARGET_FP16"
24686
+ "vcvt.f32.f16\t%q0, %P1"
24687
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
24690
+(define_insn "neon_vcvtv4hfv4sf"
24691
+ [(set (match_operand:V4HF 0 "s_register_operand" "=w")
24692
+ (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
24694
+ "TARGET_NEON && TARGET_FP16"
24695
+ "vcvt.f16.f32\t%P0, %q1"
24696
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
24699
(define_insn "neon_vcvt_n<mode>"
24700
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
24701
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
24702
@@ -5611,7 +5606,7 @@
24703
(match_operand:SI 3 "immediate_operand" "")]
24706
- emit_insn (gen_and<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
24707
+ emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
24711
--- a/src/gcc/config/arm/ldmstm.md
24712
+++ b/src/gcc/config/arm/ldmstm.md
24714
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24715
"ldm%(ia%)\t%5, {%1, %2, %3, %4}"
24716
[(set_attr "type" "load4")
24717
- (set_attr "predicable" "yes")])
24718
+ (set_attr "predicable" "yes")
24719
+ (set_attr "predicable_short_it" "no")])
24721
(define_insn "*thumb_ldm4_ia"
24722
[(match_parallel 0 "load_multiple_operation"
24724
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
24725
"ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
24726
[(set_attr "type" "load4")
24727
- (set_attr "predicable" "yes")])
24728
+ (set_attr "predicable" "yes")
24729
+ (set_attr "predicable_short_it" "no")])
24731
(define_insn "*thumb_ldm4_ia_update"
24732
[(match_parallel 0 "load_multiple_operation"
24733
@@ -108,7 +110,8 @@
24734
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24735
"stm%(ia%)\t%5, {%1, %2, %3, %4}"
24736
[(set_attr "type" "store4")
24737
- (set_attr "predicable" "yes")])
24738
+ (set_attr "predicable" "yes")
24739
+ (set_attr "predicable_short_it" "no")])
24741
(define_insn "*stm4_ia_update"
24742
[(match_parallel 0 "store_multiple_operation"
24743
@@ -125,7 +128,8 @@
24744
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
24745
"stm%(ia%)\t%5!, {%1, %2, %3, %4}"
24746
[(set_attr "type" "store4")
24747
- (set_attr "predicable" "yes")])
24748
+ (set_attr "predicable" "yes")
24749
+ (set_attr "predicable_short_it" "no")])
24751
(define_insn "*thumb_stm4_ia_update"
24752
[(match_parallel 0 "store_multiple_operation"
24753
@@ -302,7 +306,8 @@
24754
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24755
"ldm%(db%)\t%5, {%1, %2, %3, %4}"
24756
[(set_attr "type" "load4")
24757
- (set_attr "predicable" "yes")])
24758
+ (set_attr "predicable" "yes")
24759
+ (set_attr "predicable_short_it" "no")])
24761
(define_insn "*ldm4_db_update"
24762
[(match_parallel 0 "load_multiple_operation"
24763
@@ -323,7 +328,8 @@
24764
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
24765
"ldm%(db%)\t%5!, {%1, %2, %3, %4}"
24766
[(set_attr "type" "load4")
24767
- (set_attr "predicable" "yes")])
24768
+ (set_attr "predicable" "yes")
24769
+ (set_attr "predicable_short_it" "no")])
24771
(define_insn "*stm4_db"
24772
[(match_parallel 0 "store_multiple_operation"
24773
@@ -338,7 +344,8 @@
24774
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24775
"stm%(db%)\t%5, {%1, %2, %3, %4}"
24776
[(set_attr "type" "store4")
24777
- (set_attr "predicable" "yes")])
24778
+ (set_attr "predicable" "yes")
24779
+ (set_attr "predicable_short_it" "no")])
24781
(define_insn "*stm4_db_update"
24782
[(match_parallel 0 "store_multiple_operation"
24783
@@ -355,7 +362,8 @@
24784
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
24785
"stm%(db%)\t%5!, {%1, %2, %3, %4}"
24786
[(set_attr "type" "store4")
24787
- (set_attr "predicable" "yes")])
24788
+ (set_attr "predicable" "yes")
24789
+ (set_attr "predicable_short_it" "no")])
24792
[(set (match_operand:SI 0 "s_register_operand" "")
24793
@@ -477,7 +485,8 @@
24794
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24795
"ldm%(ia%)\t%4, {%1, %2, %3}"
24796
[(set_attr "type" "load3")
24797
- (set_attr "predicable" "yes")])
24798
+ (set_attr "predicable" "yes")
24799
+ (set_attr "predicable_short_it" "no")])
24801
(define_insn "*thumb_ldm3_ia"
24802
[(match_parallel 0 "load_multiple_operation"
24803
@@ -508,7 +517,8 @@
24804
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24805
"ldm%(ia%)\t%4!, {%1, %2, %3}"
24806
[(set_attr "type" "load3")
24807
- (set_attr "predicable" "yes")])
24808
+ (set_attr "predicable" "yes")
24809
+ (set_attr "predicable_short_it" "no")])
24811
(define_insn "*thumb_ldm3_ia_update"
24812
[(match_parallel 0 "load_multiple_operation"
24813
@@ -537,7 +547,8 @@
24814
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24815
"stm%(ia%)\t%4, {%1, %2, %3}"
24816
[(set_attr "type" "store3")
24817
- (set_attr "predicable" "yes")])
24818
+ (set_attr "predicable" "yes")
24819
+ (set_attr "predicable_short_it" "no")])
24821
(define_insn "*stm3_ia_update"
24822
[(match_parallel 0 "store_multiple_operation"
24823
@@ -552,7 +563,8 @@
24824
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24825
"stm%(ia%)\t%4!, {%1, %2, %3}"
24826
[(set_attr "type" "store3")
24827
- (set_attr "predicable" "yes")])
24828
+ (set_attr "predicable" "yes")
24829
+ (set_attr "predicable_short_it" "no")])
24831
(define_insn "*thumb_stm3_ia_update"
24832
[(match_parallel 0 "store_multiple_operation"
24833
@@ -704,7 +716,8 @@
24834
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24835
"ldm%(db%)\t%4, {%1, %2, %3}"
24836
[(set_attr "type" "load3")
24837
- (set_attr "predicable" "yes")])
24838
+ (set_attr "predicable" "yes")
24839
+ (set_attr "predicable_short_it" "no")])
24841
(define_insn "*ldm3_db_update"
24842
[(match_parallel 0 "load_multiple_operation"
24843
@@ -722,7 +735,8 @@
24844
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24845
"ldm%(db%)\t%4!, {%1, %2, %3}"
24846
[(set_attr "type" "load3")
24847
- (set_attr "predicable" "yes")])
24848
+ (set_attr "predicable" "yes")
24849
+ (set_attr "predicable_short_it" "no")])
24851
(define_insn "*stm3_db"
24852
[(match_parallel 0 "store_multiple_operation"
24853
@@ -735,7 +749,8 @@
24854
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24855
"stm%(db%)\t%4, {%1, %2, %3}"
24856
[(set_attr "type" "store3")
24857
- (set_attr "predicable" "yes")])
24858
+ (set_attr "predicable" "yes")
24859
+ (set_attr "predicable_short_it" "no")])
24861
(define_insn "*stm3_db_update"
24862
[(match_parallel 0 "store_multiple_operation"
24863
@@ -750,7 +765,8 @@
24864
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
24865
"stm%(db%)\t%4!, {%1, %2, %3}"
24866
[(set_attr "type" "store3")
24867
- (set_attr "predicable" "yes")])
24868
+ (set_attr "predicable" "yes")
24869
+ (set_attr "predicable_short_it" "no")])
24872
[(set (match_operand:SI 0 "s_register_operand" "")
24873
@@ -855,7 +871,8 @@
24874
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
24875
"ldm%(ia%)\t%3, {%1, %2}"
24876
[(set_attr "type" "load2")
24877
- (set_attr "predicable" "yes")])
24878
+ (set_attr "predicable" "yes")
24879
+ (set_attr "predicable_short_it" "no")])
24881
(define_insn "*thumb_ldm2_ia"
24882
[(match_parallel 0 "load_multiple_operation"
24883
@@ -880,7 +897,8 @@
24884
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24885
"ldm%(ia%)\t%3!, {%1, %2}"
24886
[(set_attr "type" "load2")
24887
- (set_attr "predicable" "yes")])
24888
+ (set_attr "predicable" "yes")
24889
+ (set_attr "predicable_short_it" "no")])
24891
(define_insn "*thumb_ldm2_ia_update"
24892
[(match_parallel 0 "load_multiple_operation"
24893
@@ -904,7 +922,8 @@
24894
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
24895
"stm%(ia%)\t%3, {%1, %2}"
24896
[(set_attr "type" "store2")
24897
- (set_attr "predicable" "yes")])
24898
+ (set_attr "predicable" "yes")
24899
+ (set_attr "predicable_short_it" "no")])
24901
(define_insn "*stm2_ia_update"
24902
[(match_parallel 0 "store_multiple_operation"
24903
@@ -917,7 +936,8 @@
24904
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24905
"stm%(ia%)\t%3!, {%1, %2}"
24906
[(set_attr "type" "store2")
24907
- (set_attr "predicable" "yes")])
24908
+ (set_attr "predicable" "yes")
24909
+ (set_attr "predicable_short_it" "no")])
24911
(define_insn "*thumb_stm2_ia_update"
24912
[(match_parallel 0 "store_multiple_operation"
24913
@@ -1044,7 +1064,8 @@
24914
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
24915
"ldm%(db%)\t%3, {%1, %2}"
24916
[(set_attr "type" "load2")
24917
- (set_attr "predicable" "yes")])
24918
+ (set_attr "predicable" "yes")
24919
+ (set_attr "predicable_short_it" "no")])
24921
(define_insn "*ldm2_db_update"
24922
[(match_parallel 0 "load_multiple_operation"
24923
@@ -1059,7 +1080,8 @@
24924
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24925
"ldm%(db%)\t%3!, {%1, %2}"
24926
[(set_attr "type" "load2")
24927
- (set_attr "predicable" "yes")])
24928
+ (set_attr "predicable" "yes")
24929
+ (set_attr "predicable_short_it" "no")])
24931
(define_insn "*stm2_db"
24932
[(match_parallel 0 "store_multiple_operation"
24933
@@ -1070,7 +1092,8 @@
24934
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
24935
"stm%(db%)\t%3, {%1, %2}"
24936
[(set_attr "type" "store2")
24937
- (set_attr "predicable" "yes")])
24938
+ (set_attr "predicable" "yes")
24939
+ (set_attr "predicable_short_it" "no")])
24941
(define_insn "*stm2_db_update"
24942
[(match_parallel 0 "store_multiple_operation"
24943
@@ -1083,7 +1106,8 @@
24944
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
24945
"stm%(db%)\t%3!, {%1, %2}"
24946
[(set_attr "type" "store2")
24947
- (set_attr "predicable" "yes")])
24948
+ (set_attr "predicable" "yes")
24949
+ (set_attr "predicable_short_it" "no")])
24952
[(set (match_operand:SI 0 "s_register_operand" "")
24953
--- a/src/gcc/config/arm/arm_neon_builtins.def
24954
+++ b/src/gcc/config/arm/arm_neon_builtins.def
24956
+/* NEON builtin definitions for ARM.
24957
+ Copyright (C) 2013
24958
+ Free Software Foundation, Inc.
24959
+ Contributed by ARM Ltd.
24961
+ This file is part of GCC.
24963
+ GCC is free software; you can redistribute it and/or modify it
24964
+ under the terms of the GNU General Public License as published
24965
+ by the Free Software Foundation; either version 3, or (at your
24966
+ option) any later version.
24968
+ GCC is distributed in the hope that it will be useful, but WITHOUT
24969
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24970
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24971
+ License for more details.
24973
+ You should have received a copy of the GNU General Public License
24974
+ along with GCC; see the file COPYING3. If not see
24975
+ <http://www.gnu.org/licenses/>. */
24977
+VAR10 (BINOP, vadd,
24978
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
24979
+VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
24980
+VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
24981
+VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
24982
+VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
24983
+VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
24984
+VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
24985
+VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
24986
+VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
24987
+VAR2 (TERNOP, vfma, v2sf, v4sf),
24988
+VAR2 (TERNOP, vfms, v2sf, v4sf),
24989
+VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
24990
+VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
24991
+VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
24992
+VAR2 (TERNOP, vqdmlal, v4hi, v2si),
24993
+VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
24994
+VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
24995
+VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
24996
+VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
24997
+VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
24998
+VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
24999
+VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
25000
+VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
25001
+VAR2 (BINOP, vqdmull, v4hi, v2si),
25002
+VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25003
+VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25004
+VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25005
+VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
25006
+VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
25007
+VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
25008
+VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25009
+VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25010
+VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25011
+VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
25012
+VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25013
+VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25014
+VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
25015
+VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
25016
+VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25017
+VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25018
+VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
25019
+VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25020
+VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25021
+VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25022
+VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25023
+VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25024
+VAR2 (BINOP, vcage, v2sf, v4sf),
25025
+VAR2 (BINOP, vcagt, v2sf, v4sf),
25026
+VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25027
+VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25028
+VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
25029
+VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25030
+VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
25031
+VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25032
+VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25033
+VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
25034
+VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25035
+VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25036
+VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
25037
+VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
25038
+VAR2 (BINOP, vrecps, v2sf, v4sf),
25039
+VAR2 (BINOP, vrsqrts, v2sf, v4sf),
25040
+VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25041
+VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
25042
+VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25043
+VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25044
+VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25045
+VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25046
+VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25047
+VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25048
+VAR2 (UNOP, vcnt, v8qi, v16qi),
25049
+VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
25050
+VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
25051
+VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
25052
+ /* FIXME: vget_lane supports more variants than this! */
25053
+VAR10 (GETLANE, vget_lane,
25054
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25055
+VAR10 (SETLANE, vset_lane,
25056
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25057
+VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
25058
+VAR10 (DUP, vdup_n,
25059
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25060
+VAR10 (DUPLANE, vdup_lane,
25061
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25062
+VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
25063
+VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
25064
+VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
25065
+VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
25066
+VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
25067
+VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
25068
+VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
25069
+VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25070
+VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25071
+VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
25072
+VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
25073
+VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25074
+VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
25075
+VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
25076
+VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25077
+VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25078
+VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
25079
+VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
25080
+VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25081
+VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
25082
+VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
25083
+VAR10 (BINOP, vext,
25084
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25085
+VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25086
+VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
25087
+VAR2 (UNOP, vrev16, v8qi, v16qi),
25088
+VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
25089
+VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
25090
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
25091
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
25092
+VAR10 (SELECT, vbsl,
25093
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25094
+VAR2 (RINT, vrintn, v2sf, v4sf),
25095
+VAR2 (RINT, vrinta, v2sf, v4sf),
25096
+VAR2 (RINT, vrintp, v2sf, v4sf),
25097
+VAR2 (RINT, vrintm, v2sf, v4sf),
25098
+VAR2 (RINT, vrintz, v2sf, v4sf),
25099
+VAR2 (RINT, vrintx, v2sf, v4sf),
25100
+VAR1 (VTBL, vtbl1, v8qi),
25101
+VAR1 (VTBL, vtbl2, v8qi),
25102
+VAR1 (VTBL, vtbl3, v8qi),
25103
+VAR1 (VTBL, vtbl4, v8qi),
25104
+VAR1 (VTBX, vtbx1, v8qi),
25105
+VAR1 (VTBX, vtbx2, v8qi),
25106
+VAR1 (VTBX, vtbx3, v8qi),
25107
+VAR1 (VTBX, vtbx4, v8qi),
25108
+VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25109
+VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25110
+VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
25111
+VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
25112
+VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
25113
+VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
25114
+VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
25115
+VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
25116
+VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
25117
+VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
25118
+VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
25119
+VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
25120
+VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
25121
+VAR10 (LOAD1, vld1,
25122
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25123
+VAR10 (LOAD1LANE, vld1_lane,
25124
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25125
+VAR10 (LOAD1, vld1_dup,
25126
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25127
+VAR10 (STORE1, vst1,
25128
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25129
+VAR10 (STORE1LANE, vst1_lane,
25130
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25132
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25133
+VAR7 (LOADSTRUCTLANE, vld2_lane,
25134
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25135
+VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
25136
+VAR9 (STORESTRUCT, vst2,
25137
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25138
+VAR7 (STORESTRUCTLANE, vst2_lane,
25139
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25141
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25142
+VAR7 (LOADSTRUCTLANE, vld3_lane,
25143
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25144
+VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
25145
+VAR9 (STORESTRUCT, vst3,
25146
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25147
+VAR7 (STORESTRUCTLANE, vst3_lane,
25148
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25149
+VAR9 (LOADSTRUCT, vld4,
25150
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25151
+VAR7 (LOADSTRUCTLANE, vld4_lane,
25152
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25153
+VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
25154
+VAR9 (STORESTRUCT, vst4,
25155
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
25156
+VAR7 (STORESTRUCTLANE, vst4_lane,
25157
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
25158
+VAR10 (LOGICBINOP, vand,
25159
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25160
+VAR10 (LOGICBINOP, vorr,
25161
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25162
+VAR10 (BINOP, veor,
25163
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25164
+VAR10 (LOGICBINOP, vbic,
25165
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
25166
+VAR10 (LOGICBINOP, vorn,
25167
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
25168
--- a/src/gcc/config/arm/neon.ml
25169
+++ b/src/gcc/config/arm/neon.ml
25171
<http://www.gnu.org/licenses/>. *)
25173
(* Shorthand types for vector elements. *)
25174
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25175
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25176
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
25177
| Cast of elts * elts | NoElts
25180
| T_uint16x4 | T_uint16x8
25181
| T_uint32x2 | T_uint32x4
25182
| T_uint64x1 | T_uint64x2
25184
| T_float32x2 | T_float32x4
25185
| T_poly8x8 | T_poly8x16
25186
| T_poly16x4 | T_poly16x8
25187
@@ -46,11 +47,13 @@
25188
| T_uint8 | T_uint16
25189
| T_uint32 | T_uint64
25190
| T_poly8 | T_poly16
25191
- | T_float32 | T_arrayof of int * vectype
25192
+ | T_float16 | T_float32
25193
+ | T_arrayof of int * vectype
25194
| T_ptrto of vectype | T_const of vectype
25196
| T_intHI | T_intSI
25197
- | T_intDI | T_floatSF
25198
+ | T_intDI | T_floatHF
25201
(* The meanings of the following are:
25202
TImode : "Tetra", two registers (four words).
25204
| Arity3 of vectype * vectype * vectype * vectype
25205
| Arity4 of vectype * vectype * vectype * vectype * vectype
25207
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
25208
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
25209
| V16QI | V8HI | V4SI | V4SF | V2DI
25210
| QI | HI | SI | SF
25212
@@ -284,18 +287,22 @@
25214
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
25215
| Requires_feature of string
25216
+ (* Mark that the intrinsic requires a particular architecture version. *)
25217
| Requires_arch of int
25218
+ (* Mark that the intrinsic requires a particular bit in __ARM_FP to
25220
+ | Requires_FP_bit of int
25222
exception MixedMode of elts * elts
25224
let rec elt_width = function
25225
S8 | U8 | P8 | I8 | B8 -> 8
25226
- | S16 | U16 | P16 | I16 | B16 -> 16
25227
+ | S16 | U16 | P16 | I16 | B16 | F16 -> 16
25228
| S32 | F32 | U32 | I32 | B32 -> 32
25229
| S64 | U64 | I64 | B64 -> 64
25231
let wa = elt_width a and wb = elt_width b in
25232
- if wa = wb then wa else failwith "element width?"
25233
+ if wa = wb then wa else raise (MixedMode (a, b))
25234
| Cast (a, b) -> raise (MixedMode (a, b))
25235
| NoElts -> failwith "No elts"
25237
@@ -303,7 +310,7 @@
25238
S8 | S16 | S32 | S64 -> Signed
25239
| U8 | U16 | U32 | U64 -> Unsigned
25242
+ | F16 | F32 -> Float
25243
| I8 | I16 | I32 | I64 -> Int
25244
| B8 | B16 | B32 | B64 -> Bits
25245
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
25246
@@ -315,6 +322,7 @@
25247
| Signed, 16 -> S16
25248
| Signed, 32 -> S32
25249
| Signed, 64 -> S64
25250
+ | Float, 16 -> F16
25252
| Unsigned, 8 -> U8
25253
| Unsigned, 16 -> U16
25254
@@ -384,7 +392,12 @@
25256
scan ((Array.length operands) - 1)
25258
-let rec mode_of_elt elt shape =
25259
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
25260
+ SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
25261
+ for the given argument position, else determine which argument to return a
25262
+ mode for automatically. *)
25264
+let rec mode_of_elt ?argpos elt shape =
25265
let flt = match elt_class elt with
25266
Float | ConvClass(_, Float) -> true | _ -> false in
25268
@@ -394,7 +407,10 @@
25269
in match shape with
25270
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
25271
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
25272
- [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
25274
+ [| V8QI; V4HF; V2SF; DI |].(idx)
25276
+ [| V8QI; V4HI; V2SI; DI |].(idx)
25277
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
25278
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
25279
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
25280
@@ -404,7 +420,11 @@
25282
[| V8QI; V4HI; V2SI; DI |].(idx)
25283
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
25284
- | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
25285
+ | Use_operands ops ->
25286
+ begin match argpos with
25287
+ None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
25288
+ | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
25290
| _ -> failwith "invalid shape"
25292
(* Modify an element type dependent on the shape of the instruction and the
25293
@@ -454,10 +474,11 @@
25294
| U16 -> T_uint16x4
25295
| U32 -> T_uint32x2
25296
| U64 -> T_uint64x1
25297
+ | F16 -> T_float16x4
25298
| F32 -> T_float32x2
25300
| P16 -> T_poly16x4
25301
- | _ -> failwith "Bad elt type"
25302
+ | _ -> failwith "Bad elt type for Dreg"
25305
begin match elt with
25306
@@ -472,7 +493,7 @@
25307
| F32 -> T_float32x4
25309
| P16 -> T_poly16x8
25310
- | _ -> failwith "Bad elt type"
25311
+ | _ -> failwith "Bad elt type for Qreg"
25314
begin match elt with
25315
@@ -487,7 +508,7 @@
25319
- | _ -> failwith "Bad elt type"
25320
+ | _ -> failwith "Bad elt type for Corereg"
25324
@@ -506,7 +527,7 @@
25325
let vectype_size = function
25326
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
25327
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
25328
- | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
25329
+ | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
25330
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
25331
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
25332
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
25333
@@ -1217,6 +1238,10 @@
25334
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
25335
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
25336
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
25337
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
25338
+ Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
25339
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
25340
+ Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
25341
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
25342
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
25343
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
25344
@@ -1782,7 +1807,7 @@
25345
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
25346
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
25347
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
25348
- | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
25349
+ | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
25350
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
25351
| NoElts -> failwith "No elts"
25353
@@ -1809,6 +1834,7 @@
25354
| T_uint32x4 -> affix "uint32x4"
25355
| T_uint64x1 -> affix "uint64x1"
25356
| T_uint64x2 -> affix "uint64x2"
25357
+ | T_float16x4 -> affix "float16x4"
25358
| T_float32x2 -> affix "float32x2"
25359
| T_float32x4 -> affix "float32x4"
25360
| T_poly8x8 -> affix "poly8x8"
25361
@@ -1825,6 +1851,7 @@
25362
| T_uint64 -> affix "uint64"
25363
| T_poly8 -> affix "poly8"
25364
| T_poly16 -> affix "poly16"
25365
+ | T_float16 -> affix "float16"
25366
| T_float32 -> affix "float32"
25367
| T_immediate _ -> "const int"
25369
@@ -1832,6 +1859,7 @@
25370
| T_intHI -> "__builtin_neon_hi"
25371
| T_intSI -> "__builtin_neon_si"
25372
| T_intDI -> "__builtin_neon_di"
25373
+ | T_floatHF -> "__builtin_neon_hf"
25374
| T_floatSF -> "__builtin_neon_sf"
25375
| T_arrayof (num, base) ->
25376
let basename = name (fun x -> x) base in
25377
@@ -1853,10 +1881,10 @@
25378
| B_XImode -> "__builtin_neon_xi"
25380
let string_of_mode = function
25381
- V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
25382
- | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
25383
- | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
25385
+ V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
25386
+ | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
25387
+ | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
25388
+ | HI -> "hi" | SI -> "si" | SF -> "sf"
25390
(* Use uppercase chars for letters which form part of the intrinsic name, but
25391
should be omitted from the builtin name (the info is passed in an extra
25392
--- a/src/gcc/config/arm/constraints.md
25393
+++ b/src/gcc/config/arm/constraints.md
25395
;; The following register constraints have been used:
25396
;; - in ARM/Thumb-2 state: t, w, x, y, z
25397
;; - in Thumb state: h, b
25398
-;; - in both states: l, c, k
25399
+;; - in both states: l, c, k, q, US
25400
;; In ARM state, 'l' is an alias for 'r'
25401
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
25404
(define_register_constraint "k" "STACK_REG"
25405
"@internal The stack register.")
25407
+(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS"
25408
+ "@internal In ARM state with LDRD support, core registers, otherwise general registers.")
25410
(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
25412
Thumb only. The union of the low registers and the stack register.")
25414
(define_register_constraint "c" "CC_REG"
25415
"@internal The condition code register.")
25417
+(define_register_constraint "Cs" "CALLER_SAVE_REGS"
25418
+ "@internal The caller save registers. Useful for sibcalls.")
25420
(define_constraint "I"
25421
"In ARM/Thumb-2 state a constant that can be used as an immediate value in a
25422
Data Processing instruction. In Thumb-1 state a constant in the range
25423
@@ -248,6 +254,12 @@
25424
(and (match_code "const_int")
25425
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
25427
+(define_constraint "De"
25429
+ In ARM/Thumb-2 state a const_int that can be used by insn anddi."
25430
+ (and (match_code "const_int")
25431
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
25433
(define_constraint "Di"
25435
In ARM/Thumb-2 state a const_int or const_double where both the high
25436
@@ -305,6 +317,9 @@
25437
(and (match_code "const_double")
25438
(match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
25440
+(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS"
25441
+ "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.")
25443
(define_memory_constraint "Ua"
25445
An address valid for loading/storing register exclusive"
25446
@@ -385,9 +400,16 @@
25448
&& GET_CODE (XEXP (op, 0)) != POST_INC")))
25450
+(define_constraint "US"
25452
+ US is a symbol reference."
25453
+ (match_code "symbol_ref")
25456
;; We used to have constraint letters for S and R in ARM state, but
25457
;; all uses of these now appear to have been removed.
25459
;; Additionally, we used to have a Q constraint in Thumb state, but
25460
;; this wasn't really a valid memory constraint. Again, all uses of
25461
;; this now seem to have been removed.
25463
--- a/src/gcc/config/arm/cortex-a7.md
25464
+++ b/src/gcc/config/arm/cortex-a7.md
25465
@@ -127,8 +127,9 @@
25467
(define_insn_reservation "cortex_a7_mul" 2
25468
(and (eq_attr "tune" "cortexa7")
25469
- (and (eq_attr "type" "mult")
25470
- (eq_attr "neon_type" "none")))
25471
+ (and (eq_attr "neon_type" "none")
25472
+ (ior (eq_attr "mul32" "yes")
25473
+ (eq_attr "mul64" "yes"))))
25476
;; Forward the result of a multiply operation to the accumulator
25477
@@ -140,7 +141,7 @@
25478
;; The latency depends on the operands, so we use an estimate here.
25479
(define_insn_reservation "cortex_a7_idiv" 5
25480
(and (eq_attr "tune" "cortexa7")
25481
- (eq_attr "insn" "udiv,sdiv"))
25482
+ (eq_attr "type" "udiv,sdiv"))
25483
"cortex_a7_both*5")
25485
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25486
--- a/src/gcc/config/arm/arm-arches.def
25487
+++ b/src/gcc/config/arm/arm-arches.def
25489
ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R)
25490
ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M)
25491
ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM)
25492
-ARM_ARCH("armv8-a", cortexa15, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
25493
+ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
25494
ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
25495
ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
25496
--- a/src/gcc/config/arm/t-arm
25497
+++ b/src/gcc/config/arm/t-arm
25499
$(srcdir)/config/arm/cortex-a8-neon.md \
25500
$(srcdir)/config/arm/cortex-a9.md \
25501
$(srcdir)/config/arm/cortex-a9-neon.md \
25502
+ $(srcdir)/config/arm/cortex-a53.md \
25503
$(srcdir)/config/arm/cortex-m4-fpu.md \
25504
$(srcdir)/config/arm/cortex-m4.md \
25505
$(srcdir)/config/arm/cortex-r4f.md \
25507
$(srcdir)/config/arm/iwmmxt.md \
25508
$(srcdir)/config/arm/iwmmxt2.md \
25509
$(srcdir)/config/arm/ldmstm.md \
25510
+ $(srcdir)/config/arm/ldrdstrd.md \
25511
$(srcdir)/config/arm/marvell-f-iwmmxt.md \
25512
$(srcdir)/config/arm/neon.md \
25513
$(srcdir)/config/arm/predicates.md \
25515
$(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
25516
$(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
25517
intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
25518
- $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def
25519
+ $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
25520
+ $(srcdir)/config/arm/arm_neon_builtins.def
25522
arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
25523
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
25524
--- a/src/gcc/config/arm/arm.opt
25525
+++ b/src/gcc/config/arm/arm.opt
25526
@@ -239,6 +239,10 @@
25527
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
25528
Only generate absolute relocations on word sized values.
25531
+Target Report Var(arm_restrict_it) Init(2)
25532
+Generate IT blocks appropriate for ARMv8.
25534
mfix-cortex-m3-ldrd
25535
Target Report Var(fix_cm3_ldrd) Init(2)
25536
Avoid overlapping destination and address registers on LDRD instructions
25537
@@ -247,3 +251,7 @@
25539
Target Report Var(unaligned_access) Init(2)
25540
Enable unaligned word and halfword accesses to packed data.
25543
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
25544
+Use Neon to perform 64-bits operations rather than core registers.
25545
--- a/src/gcc/config/arm/arm926ejs.md
25546
+++ b/src/gcc/config/arm/arm926ejs.md
25547
@@ -81,32 +81,32 @@
25549
(define_insn_reservation "9_mult1" 3
25550
(and (eq_attr "tune" "arm926ejs")
25551
- (eq_attr "insn" "smlalxy,mul,mla"))
25552
+ (eq_attr "type" "smlalxy,mul,mla"))
25555
(define_insn_reservation "9_mult2" 4
25556
(and (eq_attr "tune" "arm926ejs")
25557
- (eq_attr "insn" "muls,mlas"))
25558
+ (eq_attr "type" "muls,mlas"))
25561
(define_insn_reservation "9_mult3" 4
25562
(and (eq_attr "tune" "arm926ejs")
25563
- (eq_attr "insn" "umull,umlal,smull,smlal"))
25564
+ (eq_attr "type" "umull,umlal,smull,smlal"))
25567
(define_insn_reservation "9_mult4" 5
25568
(and (eq_attr "tune" "arm926ejs")
25569
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
25570
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
25573
(define_insn_reservation "9_mult5" 2
25574
(and (eq_attr "tune" "arm926ejs")
25575
- (eq_attr "insn" "smulxy,smlaxy,smlawx"))
25576
+ (eq_attr "type" "smulxy,smlaxy,smlawx"))
25579
(define_insn_reservation "9_mult6" 3
25580
(and (eq_attr "tune" "arm926ejs")
25581
- (eq_attr "insn" "smlalxy"))
25582
+ (eq_attr "type" "smlalxy"))
25585
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25586
--- a/src/gcc/config/arm/ldrdstrd.md
25587
+++ b/src/gcc/config/arm/ldrdstrd.md
25589
+;; ARM ldrd/strd peephole optimizations.
25591
+;; Copyright (C) 2013 Free Software Foundation, Inc.
25593
+;; Written by Greta Yorsh <greta.yorsh@arm.com>
25595
+;; This file is part of GCC.
25597
+;; GCC is free software; you can redistribute it and/or modify it
25598
+;; under the terms of the GNU General Public License as published by
25599
+;; the Free Software Foundation; either version 3, or (at your option)
25600
+;; any later version.
25602
+;; GCC is distributed in the hope that it will be useful, but
25603
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
25604
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25605
+;; General Public License for more details.
25607
+;; You should have received a copy of the GNU General Public License
25608
+;; along with GCC; see the file COPYING3. If not see
25609
+;; <http://www.gnu.org/licenses/>.
25611
+;; The following peephole optimizations identify consecutive memory
25612
+;; accesses, and try to rearrange the operands to enable generation of
25615
+(define_peephole2 ; ldrd
25616
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
25617
+ (match_operand:SI 2 "memory_operand" ""))
25618
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
25619
+ (match_operand:SI 3 "memory_operand" ""))]
25621
+ && current_tune->prefer_ldrd_strd
25622
+ && !optimize_function_for_size_p (cfun)"
25625
+ if (!gen_operands_ldrd_strd (operands, true, false, false))
25627
+ else if (TARGET_ARM)
25629
+ /* In ARM state, the destination registers of LDRD/STRD must be
25630
+ consecutive. We emit DImode access. */
25631
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
25632
+ operands[2] = adjust_address (operands[2], DImode, 0);
25633
+ /* Emit [(set (match_dup 0) (match_dup 2))] */
25634
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
25637
+ else if (TARGET_THUMB2)
25639
+ /* Emit the pattern:
25640
+ [(parallel [(set (match_dup 0) (match_dup 2))
25641
+ (set (match_dup 1) (match_dup 3))])] */
25642
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
25643
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
25644
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
25649
+(define_peephole2 ; strd
25650
+ [(set (match_operand:SI 2 "memory_operand" "")
25651
+ (match_operand:SI 0 "arm_general_register_operand" ""))
25652
+ (set (match_operand:SI 3 "memory_operand" "")
25653
+ (match_operand:SI 1 "arm_general_register_operand" ""))]
25655
+ && current_tune->prefer_ldrd_strd
25656
+ && !optimize_function_for_size_p (cfun)"
25659
+ if (!gen_operands_ldrd_strd (operands, false, false, false))
25661
+ else if (TARGET_ARM)
25663
+ /* In ARM state, the destination registers of LDRD/STRD must be
25664
+ consecutive. We emit DImode access. */
25665
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
25666
+ operands[2] = adjust_address (operands[2], DImode, 0);
25667
+ /* Emit [(set (match_dup 2) (match_dup 0))] */
25668
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
25671
+ else if (TARGET_THUMB2)
25673
+ /* Emit the pattern:
25674
+ [(parallel [(set (match_dup 2) (match_dup 0))
25675
+ (set (match_dup 3) (match_dup 1))])] */
25676
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
25677
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
25678
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
25683
+;; The following peepholes reorder registers to enable LDRD/STRD.
25684
+(define_peephole2 ; strd of constants
25685
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
25686
+ (match_operand:SI 4 "const_int_operand" ""))
25687
+ (set (match_operand:SI 2 "memory_operand" "")
25689
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
25690
+ (match_operand:SI 5 "const_int_operand" ""))
25691
+ (set (match_operand:SI 3 "memory_operand" "")
25694
+ && current_tune->prefer_ldrd_strd
25695
+ && !optimize_function_for_size_p (cfun)"
25698
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
25700
+ else if (TARGET_ARM)
25702
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
25703
+ operands[2] = adjust_address (operands[2], DImode, 0);
25704
+ /* Emit the pattern:
25705
+ [(set (match_dup 0) (match_dup 4))
25706
+ (set (match_dup 1) (match_dup 5))
25707
+ (set (match_dup 2) tmp)] */
25708
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
25709
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
25710
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
25713
+ else if (TARGET_THUMB2)
25715
+ /* Emit the pattern:
25716
+ [(set (match_dup 0) (match_dup 4))
25717
+ (set (match_dup 1) (match_dup 5))
25718
+ (parallel [(set (match_dup 2) (match_dup 0))
25719
+ (set (match_dup 3) (match_dup 1))])] */
25720
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
25721
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
25722
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
25723
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
25724
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
25729
+(define_peephole2 ; strd of constants
25730
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
25731
+ (match_operand:SI 4 "const_int_operand" ""))
25732
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
25733
+ (match_operand:SI 5 "const_int_operand" ""))
25734
+ (set (match_operand:SI 2 "memory_operand" "")
25736
+ (set (match_operand:SI 3 "memory_operand" "")
25739
+ && current_tune->prefer_ldrd_strd
25740
+ && !optimize_function_for_size_p (cfun)"
25743
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
25745
+ else if (TARGET_ARM)
25747
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
25748
+ operands[2] = adjust_address (operands[2], DImode, 0);
25749
+ /* Emit the pattern
25750
+ [(set (match_dup 0) (match_dup 4))
25751
+ (set (match_dup 1) (match_dup 5))
25752
+ (set (match_dup 2) tmp)] */
25753
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
25754
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
25755
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
25758
+ else if (TARGET_THUMB2)
25760
+ /* Emit the pattern:
25761
+ [(set (match_dup 0) (match_dup 4))
25762
+ (set (match_dup 1) (match_dup 5))
25763
+ (parallel [(set (match_dup 2) (match_dup 0))
25764
+ (set (match_dup 3) (match_dup 1))])] */
25765
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
25766
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
25767
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
25768
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
25769
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
25774
+;; The following two peephole optimizations are only relevant for ARM
25775
+;; mode where LDRD/STRD require consecutive registers.
25777
+(define_peephole2 ; swap the destination registers of two loads
25778
+ ; before a commutative operation.
25779
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
25780
+ (match_operand:SI 2 "memory_operand" ""))
25781
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
25782
+ (match_operand:SI 3 "memory_operand" ""))
25783
+ (set (match_operand:SI 4 "arm_general_register_operand" "")
25784
+ (match_operator:SI 5 "commutative_binary_operator"
25785
+ [(match_operand 6 "arm_general_register_operand" "")
25786
+ (match_operand 7 "arm_general_register_operand" "") ]))]
25787
+ "TARGET_LDRD && TARGET_ARM
25788
+ && current_tune->prefer_ldrd_strd
25789
+ && !optimize_function_for_size_p (cfun)
25790
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
25791
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
25792
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
25793
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
25794
+ [(set (match_dup 0) (match_dup 2))
25795
+ (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
25797
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
25803
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
25804
+ operands[2] = adjust_address (operands[2], DImode, 0);
25809
+(define_peephole2 ; swap the destination registers of two loads
25810
+ ; before a commutative operation that sets the flags.
25811
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
25812
+ (match_operand:SI 2 "memory_operand" ""))
25813
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
25814
+ (match_operand:SI 3 "memory_operand" ""))
25816
+ [(set (match_operand:SI 4 "arm_general_register_operand" "")
25817
+ (match_operator:SI 5 "commutative_binary_operator"
25818
+ [(match_operand 6 "arm_general_register_operand" "")
25819
+ (match_operand 7 "arm_general_register_operand" "") ]))
25820
+ (clobber (reg:CC CC_REGNUM))])]
25821
+ "TARGET_LDRD && TARGET_ARM
25822
+ && current_tune->prefer_ldrd_strd
25823
+ && !optimize_function_for_size_p (cfun)
25824
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
25825
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
25826
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
25827
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
25828
+ [(set (match_dup 0) (match_dup 2))
25830
+ [(set (match_dup 4)
25831
+ (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
25832
+ (clobber (reg:CC CC_REGNUM))])]
25834
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
25840
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
25841
+ operands[2] = adjust_address (operands[2], DImode, 0);
25846
+;; TODO: Handle LDRD/STRD with writeback:
25847
+;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
25848
+;; (b) Patterns may be followed by an update of the base address.
25849
--- a/src/gcc/config/arm/predicates.md
25850
+++ b/src/gcc/config/arm/predicates.md
25852
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
25855
+(define_predicate "imm_for_neon_inv_logic_operand"
25856
+ (match_code "const_vector")
25858
+ return (TARGET_NEON
25859
+ && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
25862
+(define_predicate "neon_inv_logic_op2"
25863
+ (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
25864
+ (match_operand 0 "s_register_operand")))
25866
;; Any hard register.
25867
(define_predicate "arm_hard_register_operand"
25869
@@ -145,6 +156,12 @@
25870
(ior (match_operand 0 "arm_rhs_operand")
25871
(match_operand 0 "arm_neg_immediate_operand")))
25873
+(define_predicate "arm_anddi_operand_neon"
25874
+ (ior (match_operand 0 "s_register_operand")
25875
+ (and (match_code "const_int")
25876
+ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
25877
+ (match_operand 0 "neon_inv_logic_op2")))
25879
(define_predicate "arm_adddi_operand"
25880
(ior (match_operand 0 "s_register_operand")
25881
(and (match_code "const_int")
25882
@@ -270,6 +287,24 @@
25883
(define_special_predicate "lt_ge_comparison_operator"
25884
(match_code "lt,ge"))
25886
+;; The vsel instruction only accepts the ARM condition codes listed below.
25887
+(define_special_predicate "arm_vsel_comparison_operator"
25888
+ (and (match_operand 0 "expandable_comparison_operator")
25889
+ (match_test "maybe_get_arm_condition_code (op) == ARM_GE
25890
+ || maybe_get_arm_condition_code (op) == ARM_GT
25891
+ || maybe_get_arm_condition_code (op) == ARM_EQ
25892
+ || maybe_get_arm_condition_code (op) == ARM_VS
25893
+ || maybe_get_arm_condition_code (op) == ARM_LT
25894
+ || maybe_get_arm_condition_code (op) == ARM_LE
25895
+ || maybe_get_arm_condition_code (op) == ARM_NE
25896
+ || maybe_get_arm_condition_code (op) == ARM_VC")))
25898
+(define_special_predicate "arm_cond_move_operator"
25899
+ (if_then_else (match_test "arm_restrict_it")
25900
+ (and (match_test "TARGET_FPU_ARMV8")
25901
+ (match_operand 0 "arm_vsel_comparison_operator"))
25902
+ (match_operand 0 "expandable_comparison_operator")))
25904
(define_special_predicate "noov_comparison_operator"
25905
(match_code "lt,ge,eq,ne"))
25907
@@ -513,21 +548,10 @@
25908
&& neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
25911
-(define_predicate "imm_for_neon_inv_logic_operand"
25912
- (match_code "const_vector")
25914
- return (TARGET_NEON
25915
- && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
25918
(define_predicate "neon_logic_op2"
25919
(ior (match_operand 0 "imm_for_neon_logic_operand")
25920
(match_operand 0 "s_register_operand")))
25922
-(define_predicate "neon_inv_logic_op2"
25923
- (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
25924
- (match_operand 0 "s_register_operand")))
25926
;; Predicates for named expanders that overlap multiple ISAs.
25928
(define_predicate "cmpdi_operand"
25929
@@ -617,3 +641,7 @@
25930
(define_predicate "mem_noofs_operand"
25931
(and (match_code "mem")
25932
(match_code "reg" "0")))
25934
+(define_predicate "call_insn_operand"
25935
+ (ior (match_code "symbol_ref")
25936
+ (match_operand 0 "s_register_operand")))
25937
--- a/src/gcc/config/arm/arm_neon.h
25938
+++ b/src/gcc/config/arm/arm_neon.h
25940
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
25941
typedef __builtin_neon_di int64x1_t;
25942
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
25943
+typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
25944
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
25945
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
25946
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
25947
@@ -6016,6 +6017,22 @@
25948
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
25951
+#if ((__ARM_FP & 0x2) != 0)
25952
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
25953
+vcvt_f16_f32 (float32x4_t __a)
25955
+ return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
25959
+#if ((__ARM_FP & 0x2) != 0)
25960
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25961
+vcvt_f32_f16 (float16x4_t __a)
25963
+ return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
25967
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25968
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
25970
--- a/src/gcc/config/arm/arm-ldmstm.ml
25971
+++ b/src/gcc/config/arm/arm-ldmstm.ml
25972
@@ -146,12 +146,15 @@
25973
| IA, true, true -> true
25976
+exception InvalidAddrMode of string;;
25978
let target addrmode thumb =
25979
match addrmode, thumb with
25980
IA, true -> "TARGET_THUMB1"
25981
| IA, false -> "TARGET_32BIT"
25982
| DB, false -> "TARGET_32BIT"
25983
| _, false -> "TARGET_ARM"
25984
+ | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
25986
let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
25987
let astr = string_of_addrmode addrmode in
25988
@@ -181,8 +184,10 @@
25990
Printf.printf "}\"\n";
25991
Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs;
25992
- begin if not thumb then
25993
+ if not thumb then begin
25994
Printf.printf "\n (set_attr \"predicable\" \"yes\")";
25995
+ if addrmode == IA || addrmode == DB then
25996
+ Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")";
25998
Printf.printf "])\n\n"
26000
--- a/src/gcc/config/arm/iwmmxt.md
26001
+++ b/src/gcc/config/arm/iwmmxt.md
26003
"TARGET_REALLY_IWMMXT"
26004
"tbcstb%?\\t%0, %1"
26005
[(set_attr "predicable" "yes")
26006
- (set_attr "wtype" "tbcst")]
26007
+ (set_attr "type" "wmmx_tbcst")]
26010
(define_insn "tbcstv4hi"
26012
"TARGET_REALLY_IWMMXT"
26013
"tbcsth%?\\t%0, %1"
26014
[(set_attr "predicable" "yes")
26015
- (set_attr "wtype" "tbcst")]
26016
+ (set_attr "type" "wmmx_tbcst")]
26019
(define_insn "tbcstv2si"
26021
"TARGET_REALLY_IWMMXT"
26022
"tbcstw%?\\t%0, %1"
26023
[(set_attr "predicable" "yes")
26024
- (set_attr "wtype" "tbcst")]
26025
+ (set_attr "type" "wmmx_tbcst")]
26028
(define_insn "iwmmxt_iordi3"
26031
[(set_attr "predicable" "yes")
26032
(set_attr "length" "4,8,8")
26033
- (set_attr "wtype" "wor,none,none")]
26034
+ (set_attr "type" "wmmx_wor,*,*")]
26037
(define_insn "iwmmxt_xordi3"
26040
[(set_attr "predicable" "yes")
26041
(set_attr "length" "4,8,8")
26042
- (set_attr "wtype" "wxor,none,none")]
26043
+ (set_attr "type" "wmmx_wxor,*,*")]
26046
(define_insn "iwmmxt_anddi3"
26049
[(set_attr "predicable" "yes")
26050
(set_attr "length" "4,8,8")
26051
- (set_attr "wtype" "wand,none,none")]
26052
+ (set_attr "type" "wmmx_wand,*,*")]
26055
(define_insn "iwmmxt_nanddi3"
26056
@@ -103,7 +103,7 @@
26057
"TARGET_REALLY_IWMMXT"
26058
"wandn%?\\t%0, %1, %2"
26059
[(set_attr "predicable" "yes")
26060
- (set_attr "wtype" "wandn")]
26061
+ (set_attr "type" "wmmx_wandn")]
26064
(define_insn "*iwmmxt_arm_movdi"
26065
@@ -155,10 +155,9 @@
26069
- (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
26070
+ (set_attr "type" "*,*,*,load2,store2,wmmx_wmov,wmmx_tmcrr,wmmx_tmrrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
26071
(set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
26072
- (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")
26073
- (set_attr "wtype" "*,*,*,*,*,wmov,tmcrr,tmrrc,wldr,wstr,*,*,*,*,*")]
26074
+ (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
26077
(define_insn "*iwmmxt_movsi_insn"
26078
@@ -188,7 +187,7 @@
26080
gcc_unreachable ();
26082
- [(set_attr "type" "*,*,*,*,load1,store1,*,*,*,*,r_2_f,f_2_r,fcpys,f_loads,f_stores")
26083
+ [(set_attr "type" "*,*,*,*,load1,store1,wmmx_tmcr,wmmx_tmrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,fcpys,f_loads,f_stores")
26084
(set_attr "length" "*,*,*,*,*, *,*,*, 16, *,*,*,*,*,*")
26085
(set_attr "pool_range" "*,*,*,*,4096, *,*,*,1024, *,*,*,*,1020,*")
26086
(set_attr "neg_pool_range" "*,*,*,*,4084, *,*,*, *, 1012,*,*,*,1008,*")
26087
@@ -200,8 +199,7 @@
26088
;; Also - we have to pretend that these insns clobber the condition code
26089
;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
26091
- (set_attr "conds" "clob")
26092
- (set_attr "wtype" "*,*,*,*,*,*,tmcr,tmrc,wldr,wstr,*,*,*,*,*")]
26093
+ (set_attr "conds" "clob")]
26096
;; Because iwmmxt_movsi_insn is not predicable, we provide the
26097
@@ -249,10 +247,9 @@
26099
[(set_attr "predicable" "yes")
26100
(set_attr "length" "4, 4, 4,4,4,8, 8,8")
26101
- (set_attr "type" "*,*,*,*,*,*,load1,store1")
26102
+ (set_attr "type" "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load1,store1")
26103
(set_attr "pool_range" "*, *, 256,*,*,*, 256,*")
26104
- (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")
26105
- (set_attr "wtype" "wmov,wstr,wldr,tmrrc,tmcrr,*,*,*")]
26106
+ (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")]
26109
(define_expand "iwmmxt_setwcgr0"
26110
@@ -318,7 +315,7 @@
26111
"TARGET_REALLY_IWMMXT"
26112
"wand\\t%0, %1, %2"
26113
[(set_attr "predicable" "yes")
26114
- (set_attr "wtype" "wand")]
26115
+ (set_attr "type" "wmmx_wand")]
26118
(define_insn "*ior<mode>3_iwmmxt"
26119
@@ -328,7 +325,7 @@
26120
"TARGET_REALLY_IWMMXT"
26122
[(set_attr "predicable" "yes")
26123
- (set_attr "wtype" "wor")]
26124
+ (set_attr "type" "wmmx_wor")]
26127
(define_insn "*xor<mode>3_iwmmxt"
26128
@@ -338,7 +335,7 @@
26129
"TARGET_REALLY_IWMMXT"
26130
"wxor\\t%0, %1, %2"
26131
[(set_attr "predicable" "yes")
26132
- (set_attr "wtype" "wxor")]
26133
+ (set_attr "type" "wmmx_wxor")]
26137
@@ -351,7 +348,7 @@
26138
"TARGET_REALLY_IWMMXT"
26139
"wadd<MMX_char>%?\\t%0, %1, %2"
26140
[(set_attr "predicable" "yes")
26141
- (set_attr "wtype" "wadd")]
26142
+ (set_attr "type" "wmmx_wadd")]
26145
(define_insn "ssaddv8qi3"
26146
@@ -361,7 +358,7 @@
26147
"TARGET_REALLY_IWMMXT"
26148
"waddbss%?\\t%0, %1, %2"
26149
[(set_attr "predicable" "yes")
26150
- (set_attr "wtype" "wadd")]
26151
+ (set_attr "type" "wmmx_wadd")]
26154
(define_insn "ssaddv4hi3"
26155
@@ -371,7 +368,7 @@
26156
"TARGET_REALLY_IWMMXT"
26157
"waddhss%?\\t%0, %1, %2"
26158
[(set_attr "predicable" "yes")
26159
- (set_attr "wtype" "wadd")]
26160
+ (set_attr "type" "wmmx_wadd")]
26163
(define_insn "ssaddv2si3"
26164
@@ -381,7 +378,7 @@
26165
"TARGET_REALLY_IWMMXT"
26166
"waddwss%?\\t%0, %1, %2"
26167
[(set_attr "predicable" "yes")
26168
- (set_attr "wtype" "wadd")]
26169
+ (set_attr "type" "wmmx_wadd")]
26172
(define_insn "usaddv8qi3"
26173
@@ -391,7 +388,7 @@
26174
"TARGET_REALLY_IWMMXT"
26175
"waddbus%?\\t%0, %1, %2"
26176
[(set_attr "predicable" "yes")
26177
- (set_attr "wtype" "wadd")]
26178
+ (set_attr "type" "wmmx_wadd")]
26181
(define_insn "usaddv4hi3"
26182
@@ -401,7 +398,7 @@
26183
"TARGET_REALLY_IWMMXT"
26184
"waddhus%?\\t%0, %1, %2"
26185
[(set_attr "predicable" "yes")
26186
- (set_attr "wtype" "wadd")]
26187
+ (set_attr "type" "wmmx_wadd")]
26190
(define_insn "usaddv2si3"
26191
@@ -411,7 +408,7 @@
26192
"TARGET_REALLY_IWMMXT"
26193
"waddwus%?\\t%0, %1, %2"
26194
[(set_attr "predicable" "yes")
26195
- (set_attr "wtype" "wadd")]
26196
+ (set_attr "type" "wmmx_wadd")]
26199
(define_insn "*sub<mode>3_iwmmxt"
26200
@@ -421,7 +418,7 @@
26201
"TARGET_REALLY_IWMMXT"
26202
"wsub<MMX_char>%?\\t%0, %1, %2"
26203
[(set_attr "predicable" "yes")
26204
- (set_attr "wtype" "wsub")]
26205
+ (set_attr "type" "wmmx_wsub")]
26208
(define_insn "sssubv8qi3"
26209
@@ -431,7 +428,7 @@
26210
"TARGET_REALLY_IWMMXT"
26211
"wsubbss%?\\t%0, %1, %2"
26212
[(set_attr "predicable" "yes")
26213
- (set_attr "wtype" "wsub")]
26214
+ (set_attr "type" "wmmx_wsub")]
26217
(define_insn "sssubv4hi3"
26218
@@ -441,7 +438,7 @@
26219
"TARGET_REALLY_IWMMXT"
26220
"wsubhss%?\\t%0, %1, %2"
26221
[(set_attr "predicable" "yes")
26222
- (set_attr "wtype" "wsub")]
26223
+ (set_attr "type" "wmmx_wsub")]
26226
(define_insn "sssubv2si3"
26227
@@ -451,7 +448,7 @@
26228
"TARGET_REALLY_IWMMXT"
26229
"wsubwss%?\\t%0, %1, %2"
26230
[(set_attr "predicable" "yes")
26231
- (set_attr "wtype" "wsub")]
26232
+ (set_attr "type" "wmmx_wsub")]
26235
(define_insn "ussubv8qi3"
26236
@@ -461,7 +458,7 @@
26237
"TARGET_REALLY_IWMMXT"
26238
"wsubbus%?\\t%0, %1, %2"
26239
[(set_attr "predicable" "yes")
26240
- (set_attr "wtype" "wsub")]
26241
+ (set_attr "type" "wmmx_wsub")]
26244
(define_insn "ussubv4hi3"
26245
@@ -471,7 +468,7 @@
26246
"TARGET_REALLY_IWMMXT"
26247
"wsubhus%?\\t%0, %1, %2"
26248
[(set_attr "predicable" "yes")
26249
- (set_attr "wtype" "wsub")]
26250
+ (set_attr "type" "wmmx_wsub")]
26253
(define_insn "ussubv2si3"
26254
@@ -481,7 +478,7 @@
26255
"TARGET_REALLY_IWMMXT"
26256
"wsubwus%?\\t%0, %1, %2"
26257
[(set_attr "predicable" "yes")
26258
- (set_attr "wtype" "wsub")]
26259
+ (set_attr "type" "wmmx_wsub")]
26262
(define_insn "*mulv4hi3_iwmmxt"
26263
@@ -491,7 +488,7 @@
26264
"TARGET_REALLY_IWMMXT"
26265
"wmulul%?\\t%0, %1, %2"
26266
[(set_attr "predicable" "yes")
26267
- (set_attr "wtype" "wmul")]
26268
+ (set_attr "type" "wmmx_wmul")]
26271
(define_insn "smulv4hi3_highpart"
26272
@@ -504,7 +501,7 @@
26273
"TARGET_REALLY_IWMMXT"
26274
"wmulsm%?\\t%0, %1, %2"
26275
[(set_attr "predicable" "yes")
26276
- (set_attr "wtype" "wmul")]
26277
+ (set_attr "type" "wmmx_wmul")]
26280
(define_insn "umulv4hi3_highpart"
26281
@@ -517,7 +514,7 @@
26282
"TARGET_REALLY_IWMMXT"
26283
"wmulum%?\\t%0, %1, %2"
26284
[(set_attr "predicable" "yes")
26285
- (set_attr "wtype" "wmul")]
26286
+ (set_attr "type" "wmmx_wmul")]
26289
(define_insn "iwmmxt_wmacs"
26290
@@ -528,7 +525,7 @@
26291
"TARGET_REALLY_IWMMXT"
26292
"wmacs%?\\t%0, %2, %3"
26293
[(set_attr "predicable" "yes")
26294
- (set_attr "wtype" "wmac")]
26295
+ (set_attr "type" "wmmx_wmac")]
26298
(define_insn "iwmmxt_wmacsz"
26299
@@ -538,7 +535,7 @@
26300
"TARGET_REALLY_IWMMXT"
26301
"wmacsz%?\\t%0, %1, %2"
26302
[(set_attr "predicable" "yes")
26303
- (set_attr "wtype" "wmac")]
26304
+ (set_attr "type" "wmmx_wmac")]
26307
(define_insn "iwmmxt_wmacu"
26308
@@ -549,7 +546,7 @@
26309
"TARGET_REALLY_IWMMXT"
26310
"wmacu%?\\t%0, %2, %3"
26311
[(set_attr "predicable" "yes")
26312
- (set_attr "wtype" "wmac")]
26313
+ (set_attr "type" "wmmx_wmac")]
26316
(define_insn "iwmmxt_wmacuz"
26317
@@ -559,7 +556,7 @@
26318
"TARGET_REALLY_IWMMXT"
26319
"wmacuz%?\\t%0, %1, %2"
26320
[(set_attr "predicable" "yes")
26321
- (set_attr "wtype" "wmac")]
26322
+ (set_attr "type" "wmmx_wmac")]
26325
;; Same as xordi3, but don't show input operands so that we don't think
26326
@@ -570,7 +567,7 @@
26327
"TARGET_REALLY_IWMMXT"
26328
"wxor%?\\t%0, %0, %0"
26329
[(set_attr "predicable" "yes")
26330
- (set_attr "wtype" "wxor")]
26331
+ (set_attr "type" "wmmx_wxor")]
26334
;; Seems like cse likes to generate these, so we have to support them.
26335
@@ -584,7 +581,7 @@
26336
"TARGET_REALLY_IWMMXT"
26337
"wxor%?\\t%0, %0, %0"
26338
[(set_attr "predicable" "yes")
26339
- (set_attr "wtype" "wxor")]
26340
+ (set_attr "type" "wmmx_wxor")]
26343
(define_insn "iwmmxt_clrv4hi"
26344
@@ -594,7 +591,7 @@
26345
"TARGET_REALLY_IWMMXT"
26346
"wxor%?\\t%0, %0, %0"
26347
[(set_attr "predicable" "yes")
26348
- (set_attr "wtype" "wxor")]
26349
+ (set_attr "type" "wmmx_wxor")]
26352
(define_insn "iwmmxt_clrv2si"
26353
@@ -603,7 +600,7 @@
26354
"TARGET_REALLY_IWMMXT"
26355
"wxor%?\\t%0, %0, %0"
26356
[(set_attr "predicable" "yes")
26357
- (set_attr "wtype" "wxor")]
26358
+ (set_attr "type" "wmmx_wxor")]
26361
;; Unsigned averages/sum of absolute differences
26362
@@ -627,7 +624,7 @@
26363
"TARGET_REALLY_IWMMXT"
26364
"wavg2br%?\\t%0, %1, %2"
26365
[(set_attr "predicable" "yes")
26366
- (set_attr "wtype" "wavg2")]
26367
+ (set_attr "type" "wmmx_wavg2")]
26370
(define_insn "iwmmxt_uavgrndv4hi3"
26371
@@ -645,7 +642,7 @@
26372
"TARGET_REALLY_IWMMXT"
26373
"wavg2hr%?\\t%0, %1, %2"
26374
[(set_attr "predicable" "yes")
26375
- (set_attr "wtype" "wavg2")]
26376
+ (set_attr "type" "wmmx_wavg2")]
26379
(define_insn "iwmmxt_uavgv8qi3"
26380
@@ -658,7 +655,7 @@
26381
"TARGET_REALLY_IWMMXT"
26382
"wavg2b%?\\t%0, %1, %2"
26383
[(set_attr "predicable" "yes")
26384
- (set_attr "wtype" "wavg2")]
26385
+ (set_attr "type" "wmmx_wavg2")]
26388
(define_insn "iwmmxt_uavgv4hi3"
26389
@@ -671,7 +668,7 @@
26390
"TARGET_REALLY_IWMMXT"
26391
"wavg2h%?\\t%0, %1, %2"
26392
[(set_attr "predicable" "yes")
26393
- (set_attr "wtype" "wavg2")]
26394
+ (set_attr "type" "wmmx_wavg2")]
26397
;; Insert/extract/shuffle
26398
@@ -690,7 +687,7 @@
26401
[(set_attr "predicable" "yes")
26402
- (set_attr "wtype" "tinsr")]
26403
+ (set_attr "type" "wmmx_tinsr")]
26406
(define_insn "iwmmxt_tinsrh"
26407
@@ -707,7 +704,7 @@
26410
[(set_attr "predicable" "yes")
26411
- (set_attr "wtype" "tinsr")]
26412
+ (set_attr "type" "wmmx_tinsr")]
26415
(define_insn "iwmmxt_tinsrw"
26416
@@ -724,7 +721,7 @@
26419
[(set_attr "predicable" "yes")
26420
- (set_attr "wtype" "tinsr")]
26421
+ (set_attr "type" "wmmx_tinsr")]
26424
(define_insn "iwmmxt_textrmub"
26425
@@ -735,7 +732,7 @@
26426
"TARGET_REALLY_IWMMXT"
26427
"textrmub%?\\t%0, %1, %2"
26428
[(set_attr "predicable" "yes")
26429
- (set_attr "wtype" "textrm")]
26430
+ (set_attr "type" "wmmx_textrm")]
26433
(define_insn "iwmmxt_textrmsb"
26434
@@ -746,7 +743,7 @@
26435
"TARGET_REALLY_IWMMXT"
26436
"textrmsb%?\\t%0, %1, %2"
26437
[(set_attr "predicable" "yes")
26438
- (set_attr "wtype" "textrm")]
26439
+ (set_attr "type" "wmmx_textrm")]
26442
(define_insn "iwmmxt_textrmuh"
26443
@@ -757,7 +754,7 @@
26444
"TARGET_REALLY_IWMMXT"
26445
"textrmuh%?\\t%0, %1, %2"
26446
[(set_attr "predicable" "yes")
26447
- (set_attr "wtype" "textrm")]
26448
+ (set_attr "type" "wmmx_textrm")]
26451
(define_insn "iwmmxt_textrmsh"
26452
@@ -768,7 +765,7 @@
26453
"TARGET_REALLY_IWMMXT"
26454
"textrmsh%?\\t%0, %1, %2"
26455
[(set_attr "predicable" "yes")
26456
- (set_attr "wtype" "textrm")]
26457
+ (set_attr "type" "wmmx_textrm")]
26460
;; There are signed/unsigned variants of this instruction, but they are
26461
@@ -780,7 +777,7 @@
26462
"TARGET_REALLY_IWMMXT"
26463
"textrmsw%?\\t%0, %1, %2"
26464
[(set_attr "predicable" "yes")
26465
- (set_attr "wtype" "textrm")]
26466
+ (set_attr "type" "wmmx_textrm")]
26469
(define_insn "iwmmxt_wshufh"
26470
@@ -790,7 +787,7 @@
26471
"TARGET_REALLY_IWMMXT"
26472
"wshufh%?\\t%0, %1, %2"
26473
[(set_attr "predicable" "yes")
26474
- (set_attr "wtype" "wshufh")]
26475
+ (set_attr "type" "wmmx_wshufh")]
26478
;; Mask-generating comparisons
26479
@@ -812,7 +809,7 @@
26480
"TARGET_REALLY_IWMMXT"
26481
"wcmpeqb%?\\t%0, %1, %2"
26482
[(set_attr "predicable" "yes")
26483
- (set_attr "wtype" "wcmpeq")]
26484
+ (set_attr "type" "wmmx_wcmpeq")]
26487
(define_insn "eqv4hi3"
26488
@@ -823,7 +820,7 @@
26489
"TARGET_REALLY_IWMMXT"
26490
"wcmpeqh%?\\t%0, %1, %2"
26491
[(set_attr "predicable" "yes")
26492
- (set_attr "wtype" "wcmpeq")]
26493
+ (set_attr "type" "wmmx_wcmpeq")]
26496
(define_insn "eqv2si3"
26497
@@ -835,7 +832,7 @@
26498
"TARGET_REALLY_IWMMXT"
26499
"wcmpeqw%?\\t%0, %1, %2"
26500
[(set_attr "predicable" "yes")
26501
- (set_attr "wtype" "wcmpeq")]
26502
+ (set_attr "type" "wmmx_wcmpeq")]
26505
(define_insn "gtuv8qi3"
26506
@@ -846,7 +843,7 @@
26507
"TARGET_REALLY_IWMMXT"
26508
"wcmpgtub%?\\t%0, %1, %2"
26509
[(set_attr "predicable" "yes")
26510
- (set_attr "wtype" "wcmpgt")]
26511
+ (set_attr "type" "wmmx_wcmpgt")]
26514
(define_insn "gtuv4hi3"
26515
@@ -857,7 +854,7 @@
26516
"TARGET_REALLY_IWMMXT"
26517
"wcmpgtuh%?\\t%0, %1, %2"
26518
[(set_attr "predicable" "yes")
26519
- (set_attr "wtype" "wcmpgt")]
26520
+ (set_attr "type" "wmmx_wcmpgt")]
26523
(define_insn "gtuv2si3"
26524
@@ -868,7 +865,7 @@
26525
"TARGET_REALLY_IWMMXT"
26526
"wcmpgtuw%?\\t%0, %1, %2"
26527
[(set_attr "predicable" "yes")
26528
- (set_attr "wtype" "wcmpgt")]
26529
+ (set_attr "type" "wmmx_wcmpgt")]
26532
(define_insn "gtv8qi3"
26533
@@ -879,7 +876,7 @@
26534
"TARGET_REALLY_IWMMXT"
26535
"wcmpgtsb%?\\t%0, %1, %2"
26536
[(set_attr "predicable" "yes")
26537
- (set_attr "wtype" "wcmpgt")]
26538
+ (set_attr "type" "wmmx_wcmpgt")]
26541
(define_insn "gtv4hi3"
26542
@@ -890,7 +887,7 @@
26543
"TARGET_REALLY_IWMMXT"
26544
"wcmpgtsh%?\\t%0, %1, %2"
26545
[(set_attr "predicable" "yes")
26546
- (set_attr "wtype" "wcmpgt")]
26547
+ (set_attr "type" "wmmx_wcmpgt")]
26550
(define_insn "gtv2si3"
26551
@@ -901,7 +898,7 @@
26552
"TARGET_REALLY_IWMMXT"
26553
"wcmpgtsw%?\\t%0, %1, %2"
26554
[(set_attr "predicable" "yes")
26555
- (set_attr "wtype" "wcmpgt")]
26556
+ (set_attr "type" "wmmx_wcmpgt")]
26560
@@ -913,7 +910,7 @@
26561
"TARGET_REALLY_IWMMXT"
26562
"wmaxs<MMX_char>%?\\t%0, %1, %2"
26563
[(set_attr "predicable" "yes")
26564
- (set_attr "wtype" "wmax")]
26565
+ (set_attr "type" "wmmx_wmax")]
26568
(define_insn "*umax<mode>3_iwmmxt"
26569
@@ -923,7 +920,7 @@
26570
"TARGET_REALLY_IWMMXT"
26571
"wmaxu<MMX_char>%?\\t%0, %1, %2"
26572
[(set_attr "predicable" "yes")
26573
- (set_attr "wtype" "wmax")]
26574
+ (set_attr "type" "wmmx_wmax")]
26577
(define_insn "*smin<mode>3_iwmmxt"
26578
@@ -933,7 +930,7 @@
26579
"TARGET_REALLY_IWMMXT"
26580
"wmins<MMX_char>%?\\t%0, %1, %2"
26581
[(set_attr "predicable" "yes")
26582
- (set_attr "wtype" "wmin")]
26583
+ (set_attr "type" "wmmx_wmin")]
26586
(define_insn "*umin<mode>3_iwmmxt"
26587
@@ -943,7 +940,7 @@
26588
"TARGET_REALLY_IWMMXT"
26589
"wminu<MMX_char>%?\\t%0, %1, %2"
26590
[(set_attr "predicable" "yes")
26591
- (set_attr "wtype" "wmin")]
26592
+ (set_attr "type" "wmmx_wmin")]
26595
;; Pack/unpack insns.
26596
@@ -956,7 +953,7 @@
26597
"TARGET_REALLY_IWMMXT"
26598
"wpackhss%?\\t%0, %1, %2"
26599
[(set_attr "predicable" "yes")
26600
- (set_attr "wtype" "wpack")]
26601
+ (set_attr "type" "wmmx_wpack")]
26604
(define_insn "iwmmxt_wpackwss"
26605
@@ -967,7 +964,7 @@
26606
"TARGET_REALLY_IWMMXT"
26607
"wpackwss%?\\t%0, %1, %2"
26608
[(set_attr "predicable" "yes")
26609
- (set_attr "wtype" "wpack")]
26610
+ (set_attr "type" "wmmx_wpack")]
26613
(define_insn "iwmmxt_wpackdss"
26614
@@ -978,7 +975,7 @@
26615
"TARGET_REALLY_IWMMXT"
26616
"wpackdss%?\\t%0, %1, %2"
26617
[(set_attr "predicable" "yes")
26618
- (set_attr "wtype" "wpack")]
26619
+ (set_attr "type" "wmmx_wpack")]
26622
(define_insn "iwmmxt_wpackhus"
26623
@@ -989,7 +986,7 @@
26624
"TARGET_REALLY_IWMMXT"
26625
"wpackhus%?\\t%0, %1, %2"
26626
[(set_attr "predicable" "yes")
26627
- (set_attr "wtype" "wpack")]
26628
+ (set_attr "type" "wmmx_wpack")]
26631
(define_insn "iwmmxt_wpackwus"
26632
@@ -1000,7 +997,7 @@
26633
"TARGET_REALLY_IWMMXT"
26634
"wpackwus%?\\t%0, %1, %2"
26635
[(set_attr "predicable" "yes")
26636
- (set_attr "wtype" "wpack")]
26637
+ (set_attr "type" "wmmx_wpack")]
26640
(define_insn "iwmmxt_wpackdus"
26641
@@ -1011,7 +1008,7 @@
26642
"TARGET_REALLY_IWMMXT"
26643
"wpackdus%?\\t%0, %1, %2"
26644
[(set_attr "predicable" "yes")
26645
- (set_attr "wtype" "wpack")]
26646
+ (set_attr "type" "wmmx_wpack")]
26649
(define_insn "iwmmxt_wunpckihb"
26650
@@ -1039,7 +1036,7 @@
26651
"TARGET_REALLY_IWMMXT"
26652
"wunpckihb%?\\t%0, %1, %2"
26653
[(set_attr "predicable" "yes")
26654
- (set_attr "wtype" "wunpckih")]
26655
+ (set_attr "type" "wmmx_wunpckih")]
26658
(define_insn "iwmmxt_wunpckihh"
26659
@@ -1059,7 +1056,7 @@
26660
"TARGET_REALLY_IWMMXT"
26661
"wunpckihh%?\\t%0, %1, %2"
26662
[(set_attr "predicable" "yes")
26663
- (set_attr "wtype" "wunpckih")]
26664
+ (set_attr "type" "wmmx_wunpckih")]
26667
(define_insn "iwmmxt_wunpckihw"
26668
@@ -1075,7 +1072,7 @@
26669
"TARGET_REALLY_IWMMXT"
26670
"wunpckihw%?\\t%0, %1, %2"
26671
[(set_attr "predicable" "yes")
26672
- (set_attr "wtype" "wunpckih")]
26673
+ (set_attr "type" "wmmx_wunpckih")]
26676
(define_insn "iwmmxt_wunpckilb"
26677
@@ -1103,7 +1100,7 @@
26678
"TARGET_REALLY_IWMMXT"
26679
"wunpckilb%?\\t%0, %1, %2"
26680
[(set_attr "predicable" "yes")
26681
- (set_attr "wtype" "wunpckil")]
26682
+ (set_attr "type" "wmmx_wunpckil")]
26685
(define_insn "iwmmxt_wunpckilh"
26686
@@ -1123,7 +1120,7 @@
26687
"TARGET_REALLY_IWMMXT"
26688
"wunpckilh%?\\t%0, %1, %2"
26689
[(set_attr "predicable" "yes")
26690
- (set_attr "wtype" "wunpckil")]
26691
+ (set_attr "type" "wmmx_wunpckil")]
26694
(define_insn "iwmmxt_wunpckilw"
26695
@@ -1139,7 +1136,7 @@
26696
"TARGET_REALLY_IWMMXT"
26697
"wunpckilw%?\\t%0, %1, %2"
26698
[(set_attr "predicable" "yes")
26699
- (set_attr "wtype" "wunpckil")]
26700
+ (set_attr "type" "wmmx_wunpckil")]
26703
(define_insn "iwmmxt_wunpckehub"
26704
@@ -1151,7 +1148,7 @@
26705
"TARGET_REALLY_IWMMXT"
26706
"wunpckehub%?\\t%0, %1"
26707
[(set_attr "predicable" "yes")
26708
- (set_attr "wtype" "wunpckeh")]
26709
+ (set_attr "type" "wmmx_wunpckeh")]
26712
(define_insn "iwmmxt_wunpckehuh"
26713
@@ -1162,7 +1159,7 @@
26714
"TARGET_REALLY_IWMMXT"
26715
"wunpckehuh%?\\t%0, %1"
26716
[(set_attr "predicable" "yes")
26717
- (set_attr "wtype" "wunpckeh")]
26718
+ (set_attr "type" "wmmx_wunpckeh")]
26721
(define_insn "iwmmxt_wunpckehuw"
26722
@@ -1173,7 +1170,7 @@
26723
"TARGET_REALLY_IWMMXT"
26724
"wunpckehuw%?\\t%0, %1"
26725
[(set_attr "predicable" "yes")
26726
- (set_attr "wtype" "wunpckeh")]
26727
+ (set_attr "type" "wmmx_wunpckeh")]
26730
(define_insn "iwmmxt_wunpckehsb"
26731
@@ -1185,7 +1182,7 @@
26732
"TARGET_REALLY_IWMMXT"
26733
"wunpckehsb%?\\t%0, %1"
26734
[(set_attr "predicable" "yes")
26735
- (set_attr "wtype" "wunpckeh")]
26736
+ (set_attr "type" "wmmx_wunpckeh")]
26739
(define_insn "iwmmxt_wunpckehsh"
26740
@@ -1196,7 +1193,7 @@
26741
"TARGET_REALLY_IWMMXT"
26742
"wunpckehsh%?\\t%0, %1"
26743
[(set_attr "predicable" "yes")
26744
- (set_attr "wtype" "wunpckeh")]
26745
+ (set_attr "type" "wmmx_wunpckeh")]
26748
(define_insn "iwmmxt_wunpckehsw"
26749
@@ -1207,7 +1204,7 @@
26750
"TARGET_REALLY_IWMMXT"
26751
"wunpckehsw%?\\t%0, %1"
26752
[(set_attr "predicable" "yes")
26753
- (set_attr "wtype" "wunpckeh")]
26754
+ (set_attr "type" "wmmx_wunpckeh")]
26757
(define_insn "iwmmxt_wunpckelub"
26758
@@ -1219,7 +1216,7 @@
26759
"TARGET_REALLY_IWMMXT"
26760
"wunpckelub%?\\t%0, %1"
26761
[(set_attr "predicable" "yes")
26762
- (set_attr "wtype" "wunpckel")]
26763
+ (set_attr "type" "wmmx_wunpckel")]
26766
(define_insn "iwmmxt_wunpckeluh"
26767
@@ -1230,7 +1227,7 @@
26768
"TARGET_REALLY_IWMMXT"
26769
"wunpckeluh%?\\t%0, %1"
26770
[(set_attr "predicable" "yes")
26771
- (set_attr "wtype" "wunpckel")]
26772
+ (set_attr "type" "wmmx_wunpckel")]
26775
(define_insn "iwmmxt_wunpckeluw"
26776
@@ -1241,7 +1238,7 @@
26777
"TARGET_REALLY_IWMMXT"
26778
"wunpckeluw%?\\t%0, %1"
26779
[(set_attr "predicable" "yes")
26780
- (set_attr "wtype" "wunpckel")]
26781
+ (set_attr "type" "wmmx_wunpckel")]
26784
(define_insn "iwmmxt_wunpckelsb"
26785
@@ -1253,7 +1250,7 @@
26786
"TARGET_REALLY_IWMMXT"
26787
"wunpckelsb%?\\t%0, %1"
26788
[(set_attr "predicable" "yes")
26789
- (set_attr "wtype" "wunpckel")]
26790
+ (set_attr "type" "wmmx_wunpckel")]
26793
(define_insn "iwmmxt_wunpckelsh"
26794
@@ -1264,7 +1261,7 @@
26795
"TARGET_REALLY_IWMMXT"
26796
"wunpckelsh%?\\t%0, %1"
26797
[(set_attr "predicable" "yes")
26798
- (set_attr "wtype" "wunpckel")]
26799
+ (set_attr "type" "wmmx_wunpckel")]
26802
(define_insn "iwmmxt_wunpckelsw"
26803
@@ -1275,7 +1272,7 @@
26804
"TARGET_REALLY_IWMMXT"
26805
"wunpckelsw%?\\t%0, %1"
26806
[(set_attr "predicable" "yes")
26807
- (set_attr "wtype" "wunpckel")]
26808
+ (set_attr "type" "wmmx_wunpckel")]
26812
@@ -1298,7 +1295,7 @@
26814
[(set_attr "predicable" "yes")
26815
(set_attr "arch" "*, iwmmxt2")
26816
- (set_attr "wtype" "wror, wror")]
26817
+ (set_attr "type" "wmmx_wror, wmmx_wror")]
26820
(define_insn "ashr<mode>3_iwmmxt"
26821
@@ -1319,7 +1316,7 @@
26823
[(set_attr "predicable" "yes")
26824
(set_attr "arch" "*, iwmmxt2")
26825
- (set_attr "wtype" "wsra, wsra")]
26826
+ (set_attr "type" "wmmx_wsra, wmmx_wsra")]
26829
(define_insn "lshr<mode>3_iwmmxt"
26830
@@ -1340,7 +1337,7 @@
26832
[(set_attr "predicable" "yes")
26833
(set_attr "arch" "*, iwmmxt2")
26834
- (set_attr "wtype" "wsrl, wsrl")]
26835
+ (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
26838
(define_insn "ashl<mode>3_iwmmxt"
26839
@@ -1361,7 +1358,7 @@
26841
[(set_attr "predicable" "yes")
26842
(set_attr "arch" "*, iwmmxt2")
26843
- (set_attr "wtype" "wsll, wsll")]
26844
+ (set_attr "type" "wmmx_wsll, wmmx_wsll")]
26847
(define_insn "ror<mode>3_di"
26848
@@ -1382,7 +1379,7 @@
26850
[(set_attr "predicable" "yes")
26851
(set_attr "arch" "*, iwmmxt2")
26852
- (set_attr "wtype" "wror, wror")]
26853
+ (set_attr "type" "wmmx_wror, wmmx_wror")]
26856
(define_insn "ashr<mode>3_di"
26857
@@ -1403,7 +1400,7 @@
26859
[(set_attr "predicable" "yes")
26860
(set_attr "arch" "*, iwmmxt2")
26861
- (set_attr "wtype" "wsra, wsra")]
26862
+ (set_attr "type" "wmmx_wsra, wmmx_wsra")]
26865
(define_insn "lshr<mode>3_di"
26866
@@ -1424,7 +1421,7 @@
26868
[(set_attr "predicable" "yes")
26869
(set_attr "arch" "*, iwmmxt2")
26870
- (set_attr "wtype" "wsrl, wsrl")]
26871
+ (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
26874
(define_insn "ashl<mode>3_di"
26875
@@ -1445,7 +1442,7 @@
26877
[(set_attr "predicable" "yes")
26878
(set_attr "arch" "*, iwmmxt2")
26879
- (set_attr "wtype" "wsll, wsll")]
26880
+ (set_attr "type" "wmmx_wsll, wmmx_wsll")]
26883
(define_insn "iwmmxt_wmadds"
26884
@@ -1464,7 +1461,7 @@
26885
"TARGET_REALLY_IWMMXT"
26886
"wmadds%?\\t%0, %1, %2"
26887
[(set_attr "predicable" "yes")
26888
- (set_attr "wtype" "wmadd")]
26889
+ (set_attr "type" "wmmx_wmadd")]
26892
(define_insn "iwmmxt_wmaddu"
26893
@@ -1483,7 +1480,7 @@
26894
"TARGET_REALLY_IWMMXT"
26895
"wmaddu%?\\t%0, %1, %2"
26896
[(set_attr "predicable" "yes")
26897
- (set_attr "wtype" "wmadd")]
26898
+ (set_attr "type" "wmmx_wmadd")]
26901
(define_insn "iwmmxt_tmia"
26902
@@ -1496,7 +1493,7 @@
26903
"TARGET_REALLY_IWMMXT"
26904
"tmia%?\\t%0, %2, %3"
26905
[(set_attr "predicable" "yes")
26906
- (set_attr "wtype" "tmia")]
26907
+ (set_attr "type" "wmmx_tmia")]
26910
(define_insn "iwmmxt_tmiaph"
26911
@@ -1514,7 +1511,7 @@
26912
"TARGET_REALLY_IWMMXT"
26913
"tmiaph%?\\t%0, %2, %3"
26914
[(set_attr "predicable" "yes")
26915
- (set_attr "wtype" "tmiaph")]
26916
+ (set_attr "type" "wmmx_tmiaph")]
26919
(define_insn "iwmmxt_tmiabb"
26920
@@ -1527,7 +1524,7 @@
26921
"TARGET_REALLY_IWMMXT"
26922
"tmiabb%?\\t%0, %2, %3"
26923
[(set_attr "predicable" "yes")
26924
- (set_attr "wtype" "tmiaxy")]
26925
+ (set_attr "type" "wmmx_tmiaxy")]
26928
(define_insn "iwmmxt_tmiatb"
26929
@@ -1544,7 +1541,7 @@
26930
"TARGET_REALLY_IWMMXT"
26931
"tmiatb%?\\t%0, %2, %3"
26932
[(set_attr "predicable" "yes")
26933
- (set_attr "wtype" "tmiaxy")]
26934
+ (set_attr "type" "wmmx_tmiaxy")]
26937
(define_insn "iwmmxt_tmiabt"
26938
@@ -1561,7 +1558,7 @@
26939
"TARGET_REALLY_IWMMXT"
26940
"tmiabt%?\\t%0, %2, %3"
26941
[(set_attr "predicable" "yes")
26942
- (set_attr "wtype" "tmiaxy")]
26943
+ (set_attr "type" "wmmx_tmiaxy")]
26946
(define_insn "iwmmxt_tmiatt"
26947
@@ -1580,7 +1577,7 @@
26948
"TARGET_REALLY_IWMMXT"
26949
"tmiatt%?\\t%0, %2, %3"
26950
[(set_attr "predicable" "yes")
26951
- (set_attr "wtype" "tmiaxy")]
26952
+ (set_attr "type" "wmmx_tmiaxy")]
26955
(define_insn "iwmmxt_tmovmskb"
26956
@@ -1589,7 +1586,7 @@
26957
"TARGET_REALLY_IWMMXT"
26958
"tmovmskb%?\\t%0, %1"
26959
[(set_attr "predicable" "yes")
26960
- (set_attr "wtype" "tmovmsk")]
26961
+ (set_attr "type" "wmmx_tmovmsk")]
26964
(define_insn "iwmmxt_tmovmskh"
26965
@@ -1598,7 +1595,7 @@
26966
"TARGET_REALLY_IWMMXT"
26967
"tmovmskh%?\\t%0, %1"
26968
[(set_attr "predicable" "yes")
26969
- (set_attr "wtype" "tmovmsk")]
26970
+ (set_attr "type" "wmmx_tmovmsk")]
26973
(define_insn "iwmmxt_tmovmskw"
26974
@@ -1607,7 +1604,7 @@
26975
"TARGET_REALLY_IWMMXT"
26976
"tmovmskw%?\\t%0, %1"
26977
[(set_attr "predicable" "yes")
26978
- (set_attr "wtype" "tmovmsk")]
26979
+ (set_attr "type" "wmmx_tmovmsk")]
26982
(define_insn "iwmmxt_waccb"
26983
@@ -1616,7 +1613,7 @@
26984
"TARGET_REALLY_IWMMXT"
26986
[(set_attr "predicable" "yes")
26987
- (set_attr "wtype" "wacc")]
26988
+ (set_attr "type" "wmmx_wacc")]
26991
(define_insn "iwmmxt_wacch"
26992
@@ -1625,7 +1622,7 @@
26993
"TARGET_REALLY_IWMMXT"
26995
[(set_attr "predicable" "yes")
26996
- (set_attr "wtype" "wacc")]
26997
+ (set_attr "type" "wmmx_wacc")]
27000
(define_insn "iwmmxt_waccw"
27001
@@ -1634,7 +1631,7 @@
27002
"TARGET_REALLY_IWMMXT"
27004
[(set_attr "predicable" "yes")
27005
- (set_attr "wtype" "wacc")]
27006
+ (set_attr "type" "wmmx_wacc")]
27009
;; use unspec here to prevent 8 * imm to be optimized by cse
27010
@@ -1651,7 +1648,7 @@
27011
"TARGET_REALLY_IWMMXT"
27012
"waligni%?\\t%0, %1, %2, %3"
27013
[(set_attr "predicable" "yes")
27014
- (set_attr "wtype" "waligni")]
27015
+ (set_attr "type" "wmmx_waligni")]
27018
(define_insn "iwmmxt_walignr"
27019
@@ -1666,7 +1663,7 @@
27020
"TARGET_REALLY_IWMMXT"
27021
"walignr%U3%?\\t%0, %1, %2"
27022
[(set_attr "predicable" "yes")
27023
- (set_attr "wtype" "walignr")]
27024
+ (set_attr "type" "wmmx_walignr")]
27027
(define_insn "iwmmxt_walignr0"
27028
@@ -1681,7 +1678,7 @@
27029
"TARGET_REALLY_IWMMXT"
27030
"walignr0%?\\t%0, %1, %2"
27031
[(set_attr "predicable" "yes")
27032
- (set_attr "wtype" "walignr")]
27033
+ (set_attr "type" "wmmx_walignr")]
27036
(define_insn "iwmmxt_walignr1"
27037
@@ -1696,7 +1693,7 @@
27038
"TARGET_REALLY_IWMMXT"
27039
"walignr1%?\\t%0, %1, %2"
27040
[(set_attr "predicable" "yes")
27041
- (set_attr "wtype" "walignr")]
27042
+ (set_attr "type" "wmmx_walignr")]
27045
(define_insn "iwmmxt_walignr2"
27046
@@ -1711,7 +1708,7 @@
27047
"TARGET_REALLY_IWMMXT"
27048
"walignr2%?\\t%0, %1, %2"
27049
[(set_attr "predicable" "yes")
27050
- (set_attr "wtype" "walignr")]
27051
+ (set_attr "type" "wmmx_walignr")]
27054
(define_insn "iwmmxt_walignr3"
27055
@@ -1726,7 +1723,7 @@
27056
"TARGET_REALLY_IWMMXT"
27057
"walignr3%?\\t%0, %1, %2"
27058
[(set_attr "predicable" "yes")
27059
- (set_attr "wtype" "walignr")]
27060
+ (set_attr "type" "wmmx_walignr")]
27063
(define_insn "iwmmxt_wsadb"
27064
@@ -1738,7 +1735,7 @@
27065
"TARGET_REALLY_IWMMXT"
27066
"wsadb%?\\t%0, %2, %3"
27067
[(set_attr "predicable" "yes")
27068
- (set_attr "wtype" "wsad")]
27069
+ (set_attr "type" "wmmx_wsad")]
27072
(define_insn "iwmmxt_wsadh"
27073
@@ -1750,7 +1747,7 @@
27074
"TARGET_REALLY_IWMMXT"
27075
"wsadh%?\\t%0, %2, %3"
27076
[(set_attr "predicable" "yes")
27077
- (set_attr "wtype" "wsad")]
27078
+ (set_attr "type" "wmmx_wsad")]
27081
(define_insn "iwmmxt_wsadbz"
27082
@@ -1760,7 +1757,7 @@
27083
"TARGET_REALLY_IWMMXT"
27084
"wsadbz%?\\t%0, %1, %2"
27085
[(set_attr "predicable" "yes")
27086
- (set_attr "wtype" "wsad")]
27087
+ (set_attr "type" "wmmx_wsad")]
27090
(define_insn "iwmmxt_wsadhz"
27091
@@ -1770,7 +1767,7 @@
27092
"TARGET_REALLY_IWMMXT"
27093
"wsadhz%?\\t%0, %1, %2"
27094
[(set_attr "predicable" "yes")
27095
- (set_attr "wtype" "wsad")]
27096
+ (set_attr "type" "wmmx_wsad")]
27099
(include "iwmmxt2.md")
27100
--- a/src/gcc/config/arm/cortex-a53.md
27101
+++ b/src/gcc/config/arm/cortex-a53.md
27103
+;; ARM Cortex-A53 pipeline description
27104
+;; Copyright (C) 2013 Free Software Foundation, Inc.
27106
+;; Contributed by ARM Ltd.
27108
+;; This file is part of GCC.
27110
+;; GCC is free software; you can redistribute it and/or modify it
27111
+;; under the terms of the GNU General Public License as published by
27112
+;; the Free Software Foundation; either version 3, or (at your option)
27113
+;; any later version.
27115
+;; GCC is distributed in the hope that it will be useful, but
27116
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
27117
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27118
+;; General Public License for more details.
27120
+;; You should have received a copy of the GNU General Public License
27121
+;; along with GCC; see the file COPYING3. If not see
27122
+;; <http://www.gnu.org/licenses/>.
27124
+(define_automaton "cortex_a53")
27126
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27127
+;; Functional units.
27128
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27130
+;; There are two main integer execution pipelines, described as
27131
+;; slot 0 and issue slot 1.
27133
+(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
27134
+(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
27136
+(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
27137
+(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
27139
+;; The load/store pipeline. Load/store instructions can dual-issue from
27140
+;; either pipeline, but two load/stores cannot simultaneously issue.
27142
+(define_cpu_unit "cortex_a53_ls" "cortex_a53")
27144
+;; The store pipeline. Shared between both execution pipelines.
27146
+(define_cpu_unit "cortex_a53_store" "cortex_a53")
27148
+;; The branch pipeline. Branches can dual-issue with other instructions
27149
+;; (except when those instructions take multiple cycles to issue).
27151
+(define_cpu_unit "cortex_a53_branch" "cortex_a53")
27153
+;; The integer divider.
27155
+(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
27157
+;; The floating-point add pipeline used to model the usage
27158
+;; of the add pipeline by fmac instructions.
27160
+(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
27162
+;; Floating-point div/sqrt (long latency, out-of-order completion).
27164
+(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
27166
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27167
+;; ALU instructions.
27168
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27170
+(define_insn_reservation "cortex_a53_alu" 2
27171
+ (and (eq_attr "tune" "cortexa53")
27172
+ (eq_attr "type" "alu_reg,simple_alu_imm"))
27173
+ "cortex_a53_slot_any")
27175
+(define_insn_reservation "cortex_a53_alu_shift" 2
27176
+ (and (eq_attr "tune" "cortexa53")
27177
+ (eq_attr "type" "alu_shift,alu_shift_reg"))
27178
+ "cortex_a53_slot_any")
27180
+;; Forwarding path for unshifted operands.
27182
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
27183
+ "cortex_a53_alu")
27185
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
27186
+ "cortex_a53_alu_shift"
27187
+ "arm_no_early_alu_shift_dep")
27189
+;; The multiplier pipeline can forward results so there's no need to specify
27190
+;; bypasses. Multiplies can only single-issue currently.
27192
+(define_insn_reservation "cortex_a53_mul" 3
27193
+ (and (eq_attr "tune" "cortexa53")
27194
+ (ior (eq_attr "mul32" "yes")
27195
+ (eq_attr "mul64" "yes")))
27196
+ "cortex_a53_single_issue")
27198
+;; A multiply with a single-register result or an MLA, followed by an
27199
+;; MLA with an accumulator dependency, has its result forwarded so two
27200
+;; such instructions can issue back-to-back.
27202
+(define_bypass 1 "cortex_a53_mul"
27204
+ "arm_mac_accumulator_is_mul_result")
27206
+;; Punt with a high enough latency for divides.
27207
+(define_insn_reservation "cortex_a53_udiv" 8
27208
+ (and (eq_attr "tune" "cortexa53")
27209
+ (eq_attr "type" "udiv"))
27210
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
27212
+(define_insn_reservation "cortex_a53_sdiv" 9
27213
+ (and (eq_attr "tune" "cortexa53")
27214
+ (eq_attr "type" "sdiv"))
27215
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
27218
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
27219
+ "cortex_a53_alu")
27220
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
27221
+ "cortex_a53_alu_shift"
27222
+ "arm_no_early_alu_shift_dep")
27224
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27225
+;; Load/store instructions.
27226
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27228
+;; Address-generation happens in the issue stage.
27230
+(define_insn_reservation "cortex_a53_load1" 3
27231
+ (and (eq_attr "tune" "cortexa53")
27232
+ (eq_attr "type" "load_byte,load1"))
27233
+ "cortex_a53_slot_any+cortex_a53_ls")
27235
+(define_insn_reservation "cortex_a53_store1" 2
27236
+ (and (eq_attr "tune" "cortexa53")
27237
+ (eq_attr "type" "store1"))
27238
+ "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
27240
+(define_insn_reservation "cortex_a53_load2" 3
27241
+ (and (eq_attr "tune" "cortexa53")
27242
+ (eq_attr "type" "load2"))
27243
+ "cortex_a53_single_issue+cortex_a53_ls")
27245
+(define_insn_reservation "cortex_a53_store2" 2
27246
+ (and (eq_attr "tune" "cortexa53")
27247
+ (eq_attr "type" "store2"))
27248
+ "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
27250
+(define_insn_reservation "cortex_a53_load3plus" 4
27251
+ (and (eq_attr "tune" "cortexa53")
27252
+ (eq_attr "type" "load3,load4"))
27253
+ "(cortex_a53_single_issue+cortex_a53_ls)*2")
27255
+(define_insn_reservation "cortex_a53_store3plus" 3
27256
+ (and (eq_attr "tune" "cortexa53")
27257
+ (eq_attr "type" "store3,store4"))
27258
+ "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
27260
+;; Load/store addresses are required early in Issue.
27261
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
27262
+ "cortex_a53_load*"
27263
+ "arm_early_load_addr_dep")
27264
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
27265
+ "cortex_a53_store*"
27266
+ "arm_early_store_addr_dep")
27268
+;; Load data can forward in the ALU pipeline
27269
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
27270
+ "cortex_a53_alu")
27271
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
27272
+ "cortex_a53_alu_shift"
27273
+ "arm_no_early_alu_shift_dep")
27275
+;; ALU ops can forward to stores.
27276
+(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
27277
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
27278
+ "arm_no_early_store_addr_dep")
27280
+(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
27281
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
27282
+ "arm_no_early_store_addr_dep")
27284
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27286
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27288
+;; Currently models all branches as dual-issuable from either execution
27289
+;; slot, which isn't true for all cases. We still need to model indirect
27292
+(define_insn_reservation "cortex_a53_branch" 0
27293
+ (and (eq_attr "tune" "cortexa53")
27294
+ (eq_attr "type" "branch,call"))
27295
+ "cortex_a53_slot_any+cortex_a53_branch")
27297
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27298
+;; Floating-point arithmetic.
27299
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27301
+(define_insn_reservation "cortex_a53_fpalu" 4
27302
+ (and (eq_attr "tune" "cortexa53")
27303
+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
27305
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
27307
+(define_insn_reservation "cortex_a53_fconst" 2
27308
+ (and (eq_attr "tune" "cortexa53")
27309
+ (eq_attr "type" "fconsts,fconstd"))
27310
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
27312
+(define_insn_reservation "cortex_a53_fpmul" 4
27313
+ (and (eq_attr "tune" "cortexa53")
27314
+ (eq_attr "type" "fmuls,fmuld"))
27315
+ "cortex_a53_slot0")
27317
+;; For single-precision multiply-accumulate, the add (accumulate) is issued after
27318
+;; the multiply completes. Model that accordingly.
27320
+(define_insn_reservation "cortex_a53_fpmac" 8
27321
+ (and (eq_attr "tune" "cortexa53")
27322
+ (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
27323
+ "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
27325
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27326
+;; Floating-point divide/square root instructions.
27327
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27328
+;; fsqrt really takes one cycle less, but that is not modelled.
27330
+(define_insn_reservation "cortex_a53_fdivs" 14
27331
+ (and (eq_attr "tune" "cortexa53")
27332
+ (eq_attr "type" "fdivs"))
27333
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
27335
+(define_insn_reservation "cortex_a53_fdivd" 29
27336
+ (and (eq_attr "tune" "cortexa53")
27337
+ (eq_attr "type" "fdivd"))
27338
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
27340
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27341
+;; VFP to/from core transfers.
27342
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27344
+(define_insn_reservation "cortex_a53_r2f" 4
27345
+ (and (eq_attr "tune" "cortexa53")
27346
+ (eq_attr "type" "r_2_f"))
27347
+ "cortex_a53_slot0")
27349
+(define_insn_reservation "cortex_a53_f2r" 2
27350
+ (and (eq_attr "tune" "cortexa53")
27351
+ (eq_attr "type" "f_2_r"))
27352
+ "cortex_a53_slot0")
27354
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27355
+;; VFP flag transfer.
27356
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27358
+(define_insn_reservation "cortex_a53_f_flags" 4
27359
+ (and (eq_attr "tune" "cortexa53")
27360
+ (eq_attr "type" "f_flag"))
27361
+ "cortex_a53_slot0")
27363
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27364
+;; VFP load/store.
27365
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27367
+(define_insn_reservation "cortex_a53_f_loads" 4
27368
+ (and (eq_attr "tune" "cortexa53")
27369
+ (eq_attr "type" "f_loads"))
27370
+ "cortex_a53_slot0")
27372
+(define_insn_reservation "cortex_a53_f_loadd" 5
27373
+ (and (eq_attr "tune" "cortexa53")
27374
+ (eq_attr "type" "f_loadd"))
27375
+ "cortex_a53_slot0")
27377
+(define_insn_reservation "cortex_a53_f_stores" 0
27378
+ (and (eq_attr "tune" "cortexa53")
27379
+ (eq_attr "type" "f_stores"))
27380
+ "cortex_a53_slot0")
27382
+(define_insn_reservation "cortex_a53_f_stored" 0
27383
+ (and (eq_attr "tune" "cortexa53")
27384
+ (eq_attr "type" "f_stored"))
27385
+ "cortex_a53_slot0")
27387
+;; Load-to-use for floating-point values has a penalty of one cycle,
27388
+;; i.e. a latency of two.
27390
+(define_bypass 2 "cortex_a53_f_loads"
27391
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
27392
+ cortex_a53_fdivs, cortex_a53_fdivd,\
27395
+(define_bypass 2 "cortex_a53_f_loadd"
27396
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
27397
+ cortex_a53_fdivs, cortex_a53_fdivd,\
27400
--- a/src/gcc/config/arm/bpabi.h
27401
+++ b/src/gcc/config/arm/bpabi.h
27404
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
27405
|mcpu=marvell-pj4 \
27406
+ |mcpu=cortex-a53 \
27407
|mcpu=generic-armv7-a \
27408
|march=armv7-m|mcpu=cortex-m3 \
27409
|march=armv7e-m|mcpu=cortex-m4 \
27411
" %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \
27413
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
27414
+ |mcpu=cortex-a53 \
27415
|mcpu=marvell-pj4 \
27416
|mcpu=generic-armv7-a \
27417
|march=armv7-m|mcpu=cortex-m3 \
27418
--- a/src/gcc/config/arm/marvell-f-iwmmxt.md
27419
+++ b/src/gcc/config/arm/marvell-f-iwmmxt.md
27420
@@ -63,52 +63,62 @@
27421
;; An attribute appended to instructions for classification
27423
(define_attr "wmmxt_shift" "yes,no"
27424
- (if_then_else (eq_attr "wtype" "wror, wsll, wsra, wsrl")
27425
+ (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl")
27426
(const_string "yes") (const_string "no"))
27429
(define_attr "wmmxt_pack" "yes,no"
27430
- (if_then_else (eq_attr "wtype" "waligni, walignr, wmerge, wpack, wshufh, wunpckeh, wunpckih, wunpckel, wunpckil")
27431
+ (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\
27432
+ wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\
27433
+ wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil")
27434
(const_string "yes") (const_string "no"))
27437
(define_attr "wmmxt_mult_c1" "yes,no"
27438
- (if_then_else (eq_attr "wtype" "wmac, wmadd, wmiaxy, wmiawxy, wmulw, wqmiaxy, wqmulwm")
27439
+ (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\
27440
+ wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\
27442
(const_string "yes") (const_string "no"))
27445
(define_attr "wmmxt_mult_c2" "yes,no"
27446
- (if_then_else (eq_attr "wtype" "wmul, wqmulm")
27447
+ (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm")
27448
(const_string "yes") (const_string "no"))
27451
(define_attr "wmmxt_alu_c1" "yes,no"
27452
- (if_then_else (eq_attr "wtype" "wabs, wabsdiff, wand, wandn, wmov, wor, wxor")
27453
+ (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\
27454
+ wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor")
27455
(const_string "yes") (const_string "no"))
27458
(define_attr "wmmxt_alu_c2" "yes,no"
27459
- (if_then_else (eq_attr "wtype" "wacc, wadd, waddsubhx, wavg2, wavg4, wcmpeq, wcmpgt, wmax, wmin, wsub, waddbhus, wsubaddhx")
27460
+ (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\
27461
+ wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\
27462
+ wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\
27463
+ wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx")
27464
(const_string "yes") (const_string "no"))
27467
(define_attr "wmmxt_alu_c3" "yes,no"
27468
- (if_then_else (eq_attr "wtype" "wsad")
27469
+ (if_then_else (eq_attr "type" "wmmx_wsad")
27470
(const_string "yes") (const_string "no"))
27473
(define_attr "wmmxt_transfer_c1" "yes,no"
27474
- (if_then_else (eq_attr "wtype" "tbcst, tinsr, tmcr, tmcrr")
27475
+ (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\
27476
+ wmmx_tmcr, wmmx_tmcrr")
27477
(const_string "yes") (const_string "no"))
27480
(define_attr "wmmxt_transfer_c2" "yes,no"
27481
- (if_then_else (eq_attr "wtype" "textrm, tmovmsk, tmrc, tmrrc")
27482
+ (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\
27483
+ wmmx_tmrc, wmmx_tmrrc")
27484
(const_string "yes") (const_string "no"))
27487
(define_attr "wmmxt_transfer_c3" "yes,no"
27488
- (if_then_else (eq_attr "wtype" "tmia, tmiaph, tmiaxy")
27489
+ (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy")
27490
(const_string "yes") (const_string "no"))
27493
@@ -169,11 +179,11 @@
27495
(define_insn_reservation "marvell_f_iwmmxt_wstr" 0
27496
(and (eq_attr "marvell_f_iwmmxt" "yes")
27497
- (eq_attr "wtype" "wstr"))
27498
+ (eq_attr "type" "wmmx_wstr"))
27499
"mf_iwmmxt_pipeline")
27501
;There is a forwarding path from MW stage
27502
(define_insn_reservation "marvell_f_iwmmxt_wldr" 5
27503
(and (eq_attr "marvell_f_iwmmxt" "yes")
27504
- (eq_attr "wtype" "wldr"))
27505
+ (eq_attr "type" "wmmx_wldr"))
27506
"mf_iwmmxt_pipeline")
27507
--- a/src/gcc/config/arm/iterators.md
27508
+++ b/src/gcc/config/arm/iterators.md
27509
@@ -496,3 +496,11 @@
27510
(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
27511
(UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
27512
(UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
27513
+;; Both kinds of return insn.
27514
+(define_code_iterator returns [return simple_return])
27515
+(define_code_attr return_str [(return "") (simple_return "simple_")])
27516
+(define_code_attr return_simple_p [(return "false") (simple_return "true")])
27517
+(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
27518
+ (simple_return " && use_simple_return_p ()")])
27519
+(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
27520
+ (simple_return " && use_simple_return_p ()")])
27521
--- a/src/gcc/config/arm/sync.md
27522
+++ b/src/gcc/config/arm/sync.md
27524
(set_attr "conds" "unconditional")
27525
(set_attr "predicable" "no")])
27527
+(define_insn "atomic_load<mode>"
27528
+ [(set (match_operand:QHSI 0 "register_operand" "=r")
27529
+ (unspec_volatile:QHSI
27530
+ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")
27531
+ (match_operand:SI 2 "const_int_operand")] ;; model
27533
+ "TARGET_HAVE_LDACQ"
27535
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
27536
+ if (model == MEMMODEL_RELAXED
27537
+ || model == MEMMODEL_CONSUME
27538
+ || model == MEMMODEL_RELEASE)
27539
+ return \"ldr<sync_sfx>\\t%0, %1\";
27541
+ return \"lda<sync_sfx>\\t%0, %1\";
27545
+(define_insn "atomic_store<mode>"
27546
+ [(set (match_operand:QHSI 0 "memory_operand" "=Q")
27547
+ (unspec_volatile:QHSI
27548
+ [(match_operand:QHSI 1 "general_operand" "r")
27549
+ (match_operand:SI 2 "const_int_operand")] ;; model
27551
+ "TARGET_HAVE_LDACQ"
27553
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
27554
+ if (model == MEMMODEL_RELAXED
27555
+ || model == MEMMODEL_CONSUME
27556
+ || model == MEMMODEL_ACQUIRE)
27557
+ return \"str<sync_sfx>\t%1, %0\";
27559
+ return \"stl<sync_sfx>\t%1, %0\";
27563
;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic,
27564
;; even for a 64-bit aligned address. Instead we use a ldrexd unparied
27568
"TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
27569
"ldrexd%?\t%0, %H0, %C1"
27570
- [(set_attr "predicable" "yes")])
27571
+ [(set_attr "predicable" "yes")
27572
+ (set_attr "predicable_short_it" "no")])
27574
(define_expand "atomic_compare_and_swap<mode>"
27575
[(match_operand:SI 0 "s_register_operand" "") ;; bool out
27576
@@ -325,8 +362,20 @@
27578
"TARGET_HAVE_LDREXBH"
27579
"ldrex<sync_sfx>%?\t%0, %C1"
27580
- [(set_attr "predicable" "yes")])
27581
+ [(set_attr "predicable" "yes")
27582
+ (set_attr "predicable_short_it" "no")])
27584
+(define_insn "arm_load_acquire_exclusive<mode>"
27585
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
27587
+ (unspec_volatile:NARROW
27588
+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
27590
+ "TARGET_HAVE_LDACQ"
27591
+ "ldaex<sync_sfx>%?\\t%0, %C1"
27592
+ [(set_attr "predicable" "yes")
27593
+ (set_attr "predicable_short_it" "no")])
27595
(define_insn "arm_load_exclusivesi"
27596
[(set (match_operand:SI 0 "s_register_operand" "=r")
27597
(unspec_volatile:SI
27598
@@ -334,8 +383,19 @@
27600
"TARGET_HAVE_LDREX"
27602
- [(set_attr "predicable" "yes")])
27603
+ [(set_attr "predicable" "yes")
27604
+ (set_attr "predicable_short_it" "no")])
27606
+(define_insn "arm_load_acquire_exclusivesi"
27607
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
27608
+ (unspec_volatile:SI
27609
+ [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
27611
+ "TARGET_HAVE_LDACQ"
27612
+ "ldaex%?\t%0, %C1"
27613
+ [(set_attr "predicable" "yes")
27614
+ (set_attr "predicable_short_it" "no")])
27616
(define_insn "arm_load_exclusivedi"
27617
[(set (match_operand:DI 0 "s_register_operand" "=r")
27618
(unspec_volatile:DI
27619
@@ -343,8 +403,19 @@
27621
"TARGET_HAVE_LDREXD"
27622
"ldrexd%?\t%0, %H0, %C1"
27623
- [(set_attr "predicable" "yes")])
27624
+ [(set_attr "predicable" "yes")
27625
+ (set_attr "predicable_short_it" "no")])
27627
+(define_insn "arm_load_acquire_exclusivedi"
27628
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
27629
+ (unspec_volatile:DI
27630
+ [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
27632
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
27633
+ "ldaexd%?\t%0, %H0, %C1"
27634
+ [(set_attr "predicable" "yes")
27635
+ (set_attr "predicable_short_it" "no")])
27637
(define_insn "arm_store_exclusive<mode>"
27638
[(set (match_operand:SI 0 "s_register_operand" "=&r")
27639
(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
27640
@@ -367,4 +438,35 @@
27642
return "strex<sync_sfx>%?\t%0, %2, %C1";
27644
- [(set_attr "predicable" "yes")])
27645
+ [(set_attr "predicable" "yes")
27646
+ (set_attr "predicable_short_it" "no")])
27648
+(define_insn "arm_store_release_exclusivedi"
27649
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
27650
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
27651
+ (set (match_operand:DI 1 "mem_noofs_operand" "=Ua")
27652
+ (unspec_volatile:DI
27653
+ [(match_operand:DI 2 "s_register_operand" "r")]
27655
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
27657
+ rtx value = operands[2];
27658
+ /* See comment in arm_store_exclusive<mode> above. */
27659
+ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
27660
+ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
27661
+ return "stlexd%?\t%0, %2, %3, %C1";
27663
+ [(set_attr "predicable" "yes")
27664
+ (set_attr "predicable_short_it" "no")])
27666
+(define_insn "arm_store_release_exclusive<mode>"
27667
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
27668
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
27669
+ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
27670
+ (unspec_volatile:QHSI
27671
+ [(match_operand:QHSI 2 "s_register_operand" "r")]
27673
+ "TARGET_HAVE_LDACQ"
27674
+ "stlex<sync_sfx>%?\t%0, %2, %C1"
27675
+ [(set_attr "predicable" "yes")
27676
+ (set_attr "predicable_short_it" "no")])
27677
--- a/src/gcc/config/arm/neon-testgen.ml
27678
+++ b/src/gcc/config/arm/neon-testgen.ml
27679
@@ -163,10 +163,12 @@
27680
match List.find (fun feature ->
27681
match feature with Requires_feature _ -> true
27682
| Requires_arch _ -> true
27683
+ | Requires_FP_bit 1 -> true
27686
Requires_feature "FMA" -> "arm_neonv2"
27687
| Requires_arch 8 -> "arm_v8_neon"
27688
+ | Requires_FP_bit 1 -> "arm_neon_fp16"
27689
| _ -> assert false
27690
with Not_found -> "arm_neon"
27692
--- a/src/gcc/config/arm/fa726te.md
27693
+++ b/src/gcc/config/arm/fa726te.md
27694
@@ -115,7 +115,7 @@
27696
(define_insn_reservation "726te_mult_op" 3
27697
(and (eq_attr "tune" "fa726te")
27698
- (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
27699
+ (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
27700
umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
27701
"fa726te_issue+fa726te_mac_pipe")
27703
--- a/src/gcc/config/arm/arm.md
27704
+++ b/src/gcc/config/arm/arm.md
27706
; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
27707
(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
27709
+; We use this attribute to disable alternatives that can produce 32-bit
27710
+; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks
27711
+; that contain 32-bit instructions.
27712
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
27714
+; This attribute is used to disable a predicated alternative when we have
27715
+; arm_restrict_it.
27716
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
27718
;; Operand number of an input operand that is shifted. Zero if the
27719
;; given instruction does not shift one of its input operands.
27720
(define_attr "shift" "" (const_int 0))
27722
(define_attr "fpu" "none,vfp"
27723
(const (symbol_ref "arm_fpu_attr")))
27725
+(define_attr "predicated" "yes,no" (const_string "no"))
27727
; LENGTH of an instruction (in bytes)
27728
(define_attr "length" ""
27731
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
27732
; arm_arch6. This attribute is used to compute attribute "enabled",
27733
; use type "any" to enable an alternative in all cases.
27734
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
27735
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
27736
(const_string "any"))
27738
(define_attr "arch_enabled" "no,yes"
27739
@@ -129,24 +140,16 @@
27740
(match_test "TARGET_32BIT && !arm_arch6"))
27741
(const_string "yes")
27743
- (and (eq_attr "arch" "onlya8")
27744
- (eq_attr "tune" "cortexa8"))
27745
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
27746
+ (match_test "TARGET_NEON")
27747
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
27748
(const_string "yes")
27750
- (and (eq_attr "arch" "neon_onlya8")
27751
- (eq_attr "tune" "cortexa8")
27752
- (match_test "TARGET_NEON"))
27753
+ (and (eq_attr "arch" "neon_for_64bits")
27754
+ (match_test "TARGET_NEON")
27755
+ (match_test "TARGET_PREFER_NEON_64BITS"))
27756
(const_string "yes")
27758
- (and (eq_attr "arch" "nota8")
27759
- (not (eq_attr "tune" "cortexa8")))
27760
- (const_string "yes")
27762
- (and (eq_attr "arch" "neon_nota8")
27763
- (not (eq_attr "tune" "cortexa8"))
27764
- (match_test "TARGET_NEON"))
27765
- (const_string "yes")
27767
(and (eq_attr "arch" "iwmmxt2")
27768
(match_test "TARGET_REALLY_IWMMXT2"))
27769
(const_string "yes")]
27770
@@ -179,6 +182,15 @@
27771
(cond [(eq_attr "insn_enabled" "no")
27772
(const_string "no")
27774
+ (and (eq_attr "predicable_short_it" "no")
27775
+ (and (eq_attr "predicated" "yes")
27776
+ (match_test "arm_restrict_it")))
27777
+ (const_string "no")
27779
+ (and (eq_attr "enabled_for_depr_it" "no")
27780
+ (match_test "arm_restrict_it"))
27781
+ (const_string "no")
27783
(eq_attr "arch_enabled" "no")
27784
(const_string "no")
27786
@@ -219,53 +231,155 @@
27787
;; scheduling information.
27789
(define_attr "insn"
27790
- "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other"
27791
+ "mov,mvn,clz,mrs,msr,xtab,sat,other"
27792
(const_string "other"))
27794
-; TYPE attribute is used to detect floating point instructions which, if
27795
-; running on a co-processor can run in parallel with other, basic instructions
27796
-; If write-buffer scheduling is enabled then it can also be used in the
27797
-; scheduling of writes.
27799
-; Classification of each insn
27800
-; Note: vfp.md has different meanings for some of these, and some further
27801
-; types as well. See that file for details.
27802
-; simple_alu_imm a simple alu instruction that doesn't hit memory or fp
27803
-; regs or have a shifted source operand and has an immediate
27804
-; operand. This currently only tracks very basic immediate
27806
-; alu_reg any alu instruction that doesn't hit memory or fp
27807
-; regs or have a shifted source operand
27808
-; and does not have an immediate operand. This is
27809
-; also the default
27810
-; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB
27811
-; alu_shift any data instruction that doesn't hit memory or fp
27812
-; regs, but has a source operand shifted by a constant
27813
-; alu_shift_reg any data instruction that doesn't hit memory or fp
27814
-; regs, but has a source operand shifted by a register value
27815
-; mult a multiply instruction
27816
-; block blockage insn, this blocks all functional units
27817
-; float a floating point arithmetic operation (subject to expansion)
27818
-; fdivd DFmode floating point division
27819
-; fdivs SFmode floating point division
27820
-; f_load[sd] A single/double load from memory. Used for VFP unit.
27821
-; f_store[sd] A single/double store to memory. Used for VFP unit.
27822
-; f_flag a transfer of co-processor flags to the CPSR
27823
-; f_2_r transfer float to core (no memory needed)
27824
-; r_2_f transfer core to float
27825
-; f_cvt convert floating<->integral
27827
-; call a subroutine call
27828
-; load_byte load byte(s) from memory to arm registers
27829
-; load1 load 1 word from memory to arm registers
27830
-; load2 load 2 words from memory to arm registers
27831
-; load3 load 3 words from memory to arm registers
27832
-; load4 load 4 words from memory to arm registers
27833
-; store store 1 word to memory from arm registers
27834
-; store2 store 2 words
27835
-; store3 store 3 words
27836
-; store4 store 4 (or more) words
27837
+; TYPE attribute is used to classify instructions for use in scheduling.
27839
+; Instruction classification:
27841
+; alu_reg any alu instruction that doesn't hit memory or fp
27842
+; regs or have a shifted source operand and does not have
27843
+; an immediate operand. This is also the default.
27844
+; alu_shift any data instruction that doesn't hit memory or fp.
27845
+; regs, but has a source operand shifted by a constant.
27846
+; alu_shift_reg any data instruction that doesn't hit memory or fp.
27847
+; block blockage insn, this blocks all functional units.
27849
+; call subroutine call.
27850
+; f_2_r transfer from float to core (no memory needed).
27851
+; f_cvt conversion between float and integral.
27852
+; f_flag transfer of co-processor flags to the CPSR.
27853
+; f_load[d,s] double/single load from memory. Used for VFP unit.
27854
+; f_minmax[d,s] double/single floating point minimum/maximum.
27855
+; f_rint[d,s] double/single floating point rount to integral.
27856
+; f_sel[d,s] double/single floating byte select.
27857
+; f_store[d,s] double/single store to memory. Used for VFP unit.
27858
+; fadd[d,s] double/single floating-point scalar addition.
27859
+; fcmp[d,s] double/single floating-point compare.
27860
+; fconst[d,s] double/single load immediate.
27861
+; fcpys single precision floating point cpy.
27862
+; fdiv[d,s] double/single precision floating point division.
27863
+; ffarith[d,s] double/single floating point abs/neg/cpy.
27864
+; ffma[d,s] double/single floating point fused multiply-accumulate.
27865
+; float floating point arithmetic operation.
27866
+; fmac[d,s] double/single floating point multiply-accumulate.
27867
+; fmul[d,s] double/single floating point multiply.
27868
+; load_byte load byte(s) from memory to arm registers.
27869
+; load1 load 1 word from memory to arm registers.
27870
+; load2 load 2 words from memory to arm registers.
27871
+; load3 load 3 words from memory to arm registers.
27872
+; load4 load 4 words from memory to arm registers.
27873
+; mla integer multiply accumulate.
27874
+; mlas integer multiply accumulate, flag setting.
27875
+; mov integer move.
27876
+; mul integer multiply.
27877
+; muls integer multiply, flag setting.
27878
+; r_2_f transfer from core to float.
27879
+; sdiv signed division.
27880
+; simple_alu_imm simple alu instruction that doesn't hit memory or fp
27881
+; regs or have a shifted source operand and has an
27882
+; immediate operand. This currently only tracks very basic
27883
+; immediate alu operations.
27884
+; simple_alu_shift simple alu instruction with a shifted source operand.
27885
+; smlad signed multiply accumulate dual.
27886
+; smladx signed multiply accumulate dual reverse.
27887
+; smlal signed multiply accumulate long.
27888
+; smlald signed multiply accumulate long dual.
27889
+; smlals signed multiply accumulate long, flag setting.
27890
+; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate.
27891
+; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate.
27892
+; smlawy signed multiply accumulate wide, 32x16-bit,
27893
+; 32-bit accumulate.
27894
+; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate.
27895
+; smlsd signed multiply subtract dual.
27896
+; smlsdx signed multiply subtract dual reverse.
27897
+; smlsld signed multiply subtract long dual.
27898
+; smmla signed most significant word multiply accumulate.
27899
+; smmul signed most significant word multiply.
27900
+; smmulr signed most significant word multiply, rounded.
27901
+; smuad signed dual multiply add.
27902
+; smuadx signed dual multiply add reverse.
27903
+; smull signed multiply long.
27904
+; smulls signed multiply long, flag setting.
27905
+; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate.
27906
+; smulxy signed multiply, 16x16-bit, 32-bit accumulate.
27907
+; smusd signed dual multiply subtract.
27908
+; smusdx signed dual multiply subtract reverse.
27909
+; store1 store 1 word to memory from arm registers.
27910
+; store2 store 2 words to memory from arm registers.
27911
+; store3 store 3 words to memory from arm registers.
27912
+; store4 store 4 (or more) words to memory from arm registers.
27913
+; udiv unsigned division.
27914
+; umaal unsigned multiply accumulate accumulate long.
27915
+; umlal unsigned multiply accumulate long.
27916
+; umlals unsigned multiply accumulate long, flag setting.
27917
+; umull unsigned multiply long.
27918
+; umulls unsigned multiply long, flag setting.
27920
+; The classification below is for instructions used by the Wireless MMX
27921
+; Technology. Each attribute value is used to classify an instruction of the
27922
+; same name or family.
27984
(define_attr "type"
27986
@@ -273,7 +387,6 @@
27994
@@ -296,6 +409,8 @@
28003
@@ -315,25 +430,119 @@
28009
- (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,\
28010
- umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
28011
- (const_string "mult")
28012
- (const_string "alu_reg")))
28107
+ (const_string "alu_reg"))
28109
+; Is this an (integer side) multiply with a 32-bit (or smaller) result?
28110
+(define_attr "mul32" "no,yes"
28113
+ "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
28114
+ smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
28115
+ (const_string "yes")
28116
+ (const_string "no")))
28118
; Is this an (integer side) multiply with a 64-bit result?
28119
(define_attr "mul64" "no,yes"
28122
- "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
28124
+ "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
28125
(const_string "yes")
28126
(const_string "no")))
28128
-; wtype for WMMX insn scheduling purposes.
28129
-(define_attr "wtype"
28130
- "none,wor,wxor,wand,wandn,wmov,tmcrr,tmrrc,wldr,wstr,tmcr,tmrc,wadd,wsub,wmul,wmac,wavg2,tinsr,textrm,wshufh,wcmpeq,wcmpgt,wmax,wmin,wpack,wunpckih,wunpckil,wunpckeh,wunpckel,wror,wsra,wsrl,wsll,wmadd,tmia,tmiaph,tmiaxy,tbcst,tmovmsk,wacc,waligni,walignr,tandc,textrc,torc,torvsc,wsad,wabs,wabsdiff,waddsubhx,wsubaddhx,wavg4,wmulw,wqmulm,wqmulwm,waddbhus,wqmiaxy,wmiaxy,wmiawxy,wmerge" (const_string "none"))
28132
; Load scheduling, set from the arm_ld_sched variable
28133
; initialized by arm_option_override()
28134
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
28135
@@ -458,9 +667,19 @@
28136
; than one on the main cpu execution unit.
28137
(define_attr "core_cycles" "single,multi"
28138
(if_then_else (eq_attr "type"
28139
- "simple_alu_imm,alu_reg,\
28140
- simple_alu_shift,alu_shift,\
28141
- float,fdivd,fdivs")
28142
+ "simple_alu_imm, alu_reg,\
28143
+ simple_alu_shift, alu_shift, float, fdivd, fdivs,\
28144
+ wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
28145
+ wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
28146
+ wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
28147
+ wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\
28148
+ wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\
28149
+ wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\
28150
+ wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\
28151
+ wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\
28152
+ wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\
28153
+ wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\
28154
+ wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge")
28155
(const_string "single")
28156
(const_string "multi")))
28158
@@ -502,7 +721,7 @@
28160
(define_attr "generic_sched" "yes,no"
28161
(const (if_then_else
28162
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4,marvell_pj4")
28163
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexm4,marvell_pj4")
28164
(eq_attr "tune_cortexr4" "yes"))
28165
(const_string "no")
28166
(const_string "yes"))))
28167
@@ -510,7 +729,7 @@
28168
(define_attr "generic_vfp" "yes,no"
28169
(const (if_then_else
28170
(and (eq_attr "fpu" "vfp")
28171
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4,marvell_pj4")
28172
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
28173
(eq_attr "tune_cortexr4" "no"))
28174
(const_string "yes")
28175
(const_string "no"))))
28176
@@ -531,6 +750,7 @@
28177
(include "cortex-a8.md")
28178
(include "cortex-a9.md")
28179
(include "cortex-a15.md")
28180
+(include "cortex-a53.md")
28181
(include "cortex-r4.md")
28182
(include "cortex-r4f.md")
28183
(include "cortex-m4.md")
28184
@@ -844,7 +1064,7 @@
28186
;; This is the canonicalization of addsi3_compare0_for_combiner when the
28187
;; addend is a constant.
28188
-(define_insn "*cmpsi2_addneg"
28189
+(define_insn "cmpsi2_addneg"
28190
[(set (reg:CC CC_REGNUM)
28192
(match_operand:SI 1 "s_register_operand" "r,r")
28193
@@ -975,7 +1195,8 @@
28196
sbc%?\\t%0, %1, #%B2"
28197
- [(set_attr "conds" "use")]
28198
+ [(set_attr "conds" "use")
28199
+ (set_attr "predicable" "yes")]
28202
(define_insn "*addsi3_carryin_alt2_<optab>"
28203
@@ -987,7 +1208,8 @@
28206
sbc%?\\t%0, %1, #%B2"
28207
- [(set_attr "conds" "use")]
28208
+ [(set_attr "conds" "use")
28209
+ (set_attr "predicable" "yes")]
28212
(define_insn "*addsi3_carryin_shift_<optab>"
28213
@@ -1001,6 +1223,7 @@
28215
"adc%?\\t%0, %1, %3%S2"
28216
[(set_attr "conds" "use")
28217
+ (set_attr "predicable" "yes")
28218
(set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
28219
(const_string "alu_shift")
28220
(const_string "alu_shift_reg")))]
28221
@@ -1017,26 +1240,88 @@
28222
[(set_attr "conds" "set")]
28225
-(define_expand "incscc"
28226
+(define_insn "*subsi3_carryin"
28227
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
28228
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
28229
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
28230
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
28231
+ (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
28232
+ (match_operand:SI 2 "s_register_operand" "r,r"))
28233
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28237
+ sbc%?\\t%0, %1, %2
28238
+ rsc%?\\t%0, %2, %1"
28239
+ [(set_attr "conds" "use")
28240
+ (set_attr "arch" "*,a")
28241
+ (set_attr "predicable" "yes")]
28244
-(define_insn "*arm_incscc"
28245
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
28246
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
28247
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
28248
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
28249
+(define_insn "*subsi3_carryin_const"
28250
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
28251
+ (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r")
28252
+ (match_operand:SI 2 "arm_not_operand" "K"))
28253
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28255
+ "sbc\\t%0, %1, #%B2"
28256
+ [(set_attr "conds" "use")]
28259
+(define_insn "*subsi3_carryin_compare"
28260
+ [(set (reg:CC CC_REGNUM)
28261
+ (compare:CC (match_operand:SI 1 "s_register_operand" "r")
28262
+ (match_operand:SI 2 "s_register_operand" "r")))
28263
+ (set (match_operand:SI 0 "s_register_operand" "=r")
28264
+ (minus:SI (minus:SI (match_dup 1)
28266
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28268
+ "sbcs\\t%0, %1, %2"
28269
+ [(set_attr "conds" "set")]
28272
+(define_insn "*subsi3_carryin_compare_const"
28273
+ [(set (reg:CC CC_REGNUM)
28274
+ (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r")
28275
+ (match_operand:SI 2 "arm_not_operand" "K")))
28276
+ (set (match_operand:SI 0 "s_register_operand" "=r")
28277
+ (minus:SI (plus:SI (match_dup 1)
28279
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28281
+ "sbcs\\t%0, %1, #%B2"
28282
+ [(set_attr "conds" "set")]
28285
+(define_insn "*subsi3_carryin_shift"
28286
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
28287
+ (minus:SI (minus:SI
28288
+ (match_operand:SI 1 "s_register_operand" "r")
28289
+ (match_operator:SI 2 "shift_operator"
28290
+ [(match_operand:SI 3 "s_register_operand" "r")
28291
+ (match_operand:SI 4 "reg_or_int_operand" "rM")]))
28292
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28294
+ "sbc%?\\t%0, %1, %3%S2"
28295
+ [(set_attr "conds" "use")
28296
+ (set_attr "predicable" "yes")
28297
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
28298
+ (const_string "alu_shift")
28299
+ (const_string "alu_shift_reg")))]
28302
+(define_insn "*rsbsi3_carryin_shift"
28303
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
28304
+ (minus:SI (minus:SI
28305
+ (match_operator:SI 2 "shift_operator"
28306
+ [(match_operand:SI 3 "s_register_operand" "r")
28307
+ (match_operand:SI 4 "reg_or_int_operand" "rM")])
28308
+ (match_operand:SI 1 "s_register_operand" "r"))
28309
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28312
- add%d2\\t%0, %1, #1
28313
- mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
28314
+ "rsc%?\\t%0, %1, %3%S2"
28315
[(set_attr "conds" "use")
28316
- (set_attr "length" "4,8")]
28317
+ (set_attr "predicable" "yes")
28318
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
28319
+ (const_string "alu_shift")
28320
+ (const_string "alu_shift_reg")))]
28323
; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant.
28324
@@ -1087,13 +1372,27 @@
28328
-(define_insn "*arm_subdi3"
28329
+(define_insn_and_split "*arm_subdi3"
28330
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r")
28331
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
28332
(match_operand:DI 2 "s_register_operand" "r,0,0")))
28333
(clobber (reg:CC CC_REGNUM))]
28334
"TARGET_32BIT && !TARGET_NEON"
28335
- "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
28336
+ "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
28337
+ "&& reload_completed"
28338
+ [(parallel [(set (reg:CC CC_REGNUM)
28339
+ (compare:CC (match_dup 1) (match_dup 2)))
28340
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
28341
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
28342
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28344
+ operands[3] = gen_highpart (SImode, operands[0]);
28345
+ operands[0] = gen_lowpart (SImode, operands[0]);
28346
+ operands[4] = gen_highpart (SImode, operands[1]);
28347
+ operands[1] = gen_lowpart (SImode, operands[1]);
28348
+ operands[5] = gen_highpart (SImode, operands[2]);
28349
+ operands[2] = gen_lowpart (SImode, operands[2]);
28351
[(set_attr "conds" "clob")
28352
(set_attr "length" "8")]
28354
@@ -1108,55 +1407,113 @@
28355
[(set_attr "length" "4")]
28358
-(define_insn "*subdi_di_zesidi"
28359
+(define_insn_and_split "*subdi_di_zesidi"
28360
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
28361
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
28363
(match_operand:SI 2 "s_register_operand" "r,r"))))
28364
(clobber (reg:CC CC_REGNUM))]
28366
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
28367
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
28368
+ "&& reload_completed"
28369
+ [(parallel [(set (reg:CC CC_REGNUM)
28370
+ (compare:CC (match_dup 1) (match_dup 2)))
28371
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
28372
+ (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5))
28373
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28375
+ operands[3] = gen_highpart (SImode, operands[0]);
28376
+ operands[0] = gen_lowpart (SImode, operands[0]);
28377
+ operands[4] = gen_highpart (SImode, operands[1]);
28378
+ operands[1] = gen_lowpart (SImode, operands[1]);
28379
+ operands[5] = GEN_INT (~0);
28381
[(set_attr "conds" "clob")
28382
(set_attr "length" "8")]
28385
-(define_insn "*subdi_di_sesidi"
28386
+(define_insn_and_split "*subdi_di_sesidi"
28387
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
28388
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
28390
(match_operand:SI 2 "s_register_operand" "r,r"))))
28391
(clobber (reg:CC CC_REGNUM))]
28393
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
28394
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
28395
+ "&& reload_completed"
28396
+ [(parallel [(set (reg:CC CC_REGNUM)
28397
+ (compare:CC (match_dup 1) (match_dup 2)))
28398
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
28399
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4)
28400
+ (ashiftrt:SI (match_dup 2)
28402
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28404
+ operands[3] = gen_highpart (SImode, operands[0]);
28405
+ operands[0] = gen_lowpart (SImode, operands[0]);
28406
+ operands[4] = gen_highpart (SImode, operands[1]);
28407
+ operands[1] = gen_lowpart (SImode, operands[1]);
28409
[(set_attr "conds" "clob")
28410
(set_attr "length" "8")]
28413
-(define_insn "*subdi_zesidi_di"
28414
+(define_insn_and_split "*subdi_zesidi_di"
28415
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
28416
(minus:DI (zero_extend:DI
28417
(match_operand:SI 2 "s_register_operand" "r,r"))
28418
(match_operand:DI 1 "s_register_operand" "0,r")))
28419
(clobber (reg:CC CC_REGNUM))]
28421
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
28422
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
28423
+ ; is equivalent to:
28424
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0"
28425
+ "&& reload_completed"
28426
+ [(parallel [(set (reg:CC CC_REGNUM)
28427
+ (compare:CC (match_dup 2) (match_dup 1)))
28428
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
28429
+ (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4))
28430
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28432
+ operands[3] = gen_highpart (SImode, operands[0]);
28433
+ operands[0] = gen_lowpart (SImode, operands[0]);
28434
+ operands[4] = gen_highpart (SImode, operands[1]);
28435
+ operands[1] = gen_lowpart (SImode, operands[1]);
28437
[(set_attr "conds" "clob")
28438
(set_attr "length" "8")]
28441
-(define_insn "*subdi_sesidi_di"
28442
+(define_insn_and_split "*subdi_sesidi_di"
28443
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
28444
(minus:DI (sign_extend:DI
28445
(match_operand:SI 2 "s_register_operand" "r,r"))
28446
(match_operand:DI 1 "s_register_operand" "0,r")))
28447
(clobber (reg:CC CC_REGNUM))]
28449
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
28450
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
28451
+ ; is equivalent to:
28452
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31"
28453
+ "&& reload_completed"
28454
+ [(parallel [(set (reg:CC CC_REGNUM)
28455
+ (compare:CC (match_dup 2) (match_dup 1)))
28456
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
28457
+ (set (match_dup 3) (minus:SI (minus:SI
28458
+ (ashiftrt:SI (match_dup 2)
28461
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28463
+ operands[3] = gen_highpart (SImode, operands[0]);
28464
+ operands[0] = gen_lowpart (SImode, operands[0]);
28465
+ operands[4] = gen_highpart (SImode, operands[1]);
28466
+ operands[1] = gen_lowpart (SImode, operands[1]);
28468
[(set_attr "conds" "clob")
28469
(set_attr "length" "8")]
28472
-(define_insn "*subdi_zesidi_zesidi"
28473
+(define_insn_and_split "*subdi_zesidi_zesidi"
28474
[(set (match_operand:DI 0 "s_register_operand" "=r")
28475
(minus:DI (zero_extend:DI
28476
(match_operand:SI 1 "s_register_operand" "r"))
28477
@@ -1164,7 +1521,17 @@
28478
(match_operand:SI 2 "s_register_operand" "r"))))
28479
(clobber (reg:CC CC_REGNUM))]
28481
- "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
28482
+ "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
28483
+ "&& reload_completed"
28484
+ [(parallel [(set (reg:CC CC_REGNUM)
28485
+ (compare:CC (match_dup 1) (match_dup 2)))
28486
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
28487
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1))
28488
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
28490
+ operands[3] = gen_highpart (SImode, operands[0]);
28491
+ operands[0] = gen_lowpart (SImode, operands[0]);
28493
[(set_attr "conds" "clob")
28494
(set_attr "length" "8")]
28496
@@ -1254,7 +1621,7 @@
28497
(set_attr "type" "simple_alu_imm,*,*")]
28500
-(define_insn "*subsi3_compare"
28501
+(define_insn "subsi3_compare"
28502
[(set (reg:CC CC_REGNUM)
28503
(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
28504
(match_operand:SI 2 "arm_rhs_operand" "I,r,r")))
28505
@@ -1269,29 +1636,6 @@
28506
(set_attr "type" "simple_alu_imm,*,*")]
28509
-(define_expand "decscc"
28510
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
28511
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
28512
- (match_operator:SI 2 "arm_comparison_operator"
28513
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
28518
-(define_insn "*arm_decscc"
28519
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
28520
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
28521
- (match_operator:SI 2 "arm_comparison_operator"
28522
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
28525
- sub%d2\\t%0, %1, #1
28526
- mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
28527
- [(set_attr "conds" "use")
28528
- (set_attr "length" "*,8")
28529
- (set_attr "type" "simple_alu_imm,*")]
28532
(define_expand "subsf3"
28533
[(set (match_operand:SF 0 "s_register_operand" "")
28534
(minus:SF (match_operand:SF 1 "s_register_operand" "")
28535
@@ -1326,18 +1670,21 @@
28536
(match_operand:SI 1 "s_register_operand" "%0,r")))]
28537
"TARGET_32BIT && !arm_arch6"
28538
"mul%?\\t%0, %2, %1"
28539
- [(set_attr "insn" "mul")
28540
+ [(set_attr "type" "mul")
28541
(set_attr "predicable" "yes")]
28544
(define_insn "*arm_mulsi3_v6"
28545
- [(set (match_operand:SI 0 "s_register_operand" "=r")
28546
- (mult:SI (match_operand:SI 1 "s_register_operand" "r")
28547
- (match_operand:SI 2 "s_register_operand" "r")))]
28548
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
28549
+ (mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r")
28550
+ (match_operand:SI 2 "s_register_operand" "l,0,r")))]
28551
"TARGET_32BIT && arm_arch6"
28552
"mul%?\\t%0, %1, %2"
28553
- [(set_attr "insn" "mul")
28554
- (set_attr "predicable" "yes")]
28555
+ [(set_attr "type" "mul")
28556
+ (set_attr "predicable" "yes")
28557
+ (set_attr "arch" "t2,t2,*")
28558
+ (set_attr "length" "4")
28559
+ (set_attr "predicable_short_it" "yes,yes,no")]
28562
; Unfortunately with the Thumb the '&'/'0' trick can fails when operands
28563
@@ -1357,7 +1704,7 @@
28564
return \"mul\\t%0, %2\";
28566
[(set_attr "length" "4,4,2")
28567
- (set_attr "insn" "mul")]
28568
+ (set_attr "type" "muls")]
28571
(define_insn "*thumb_mulsi3_v6"
28572
@@ -1370,7 +1717,7 @@
28575
[(set_attr "length" "2")
28576
- (set_attr "insn" "mul")]
28577
+ (set_attr "type" "muls")]
28580
(define_insn "*mulsi3_compare0"
28581
@@ -1384,7 +1731,7 @@
28582
"TARGET_ARM && !arm_arch6"
28583
"mul%.\\t%0, %2, %1"
28584
[(set_attr "conds" "set")
28585
- (set_attr "insn" "muls")]
28586
+ (set_attr "type" "muls")]
28589
(define_insn "*mulsi3_compare0_v6"
28590
@@ -1398,7 +1745,7 @@
28591
"TARGET_ARM && arm_arch6 && optimize_size"
28592
"mul%.\\t%0, %2, %1"
28593
[(set_attr "conds" "set")
28594
- (set_attr "insn" "muls")]
28595
+ (set_attr "type" "muls")]
28598
(define_insn "*mulsi_compare0_scratch"
28599
@@ -1411,7 +1758,7 @@
28600
"TARGET_ARM && !arm_arch6"
28601
"mul%.\\t%0, %2, %1"
28602
[(set_attr "conds" "set")
28603
- (set_attr "insn" "muls")]
28604
+ (set_attr "type" "muls")]
28607
(define_insn "*mulsi_compare0_scratch_v6"
28608
@@ -1424,7 +1771,7 @@
28609
"TARGET_ARM && arm_arch6 && optimize_size"
28610
"mul%.\\t%0, %2, %1"
28611
[(set_attr "conds" "set")
28612
- (set_attr "insn" "muls")]
28613
+ (set_attr "type" "muls")]
28616
;; Unnamed templates to match MLA instruction.
28617
@@ -1437,7 +1784,7 @@
28618
(match_operand:SI 3 "s_register_operand" "r,r,0,0")))]
28619
"TARGET_32BIT && !arm_arch6"
28620
"mla%?\\t%0, %2, %1, %3"
28621
- [(set_attr "insn" "mla")
28622
+ [(set_attr "type" "mla")
28623
(set_attr "predicable" "yes")]
28626
@@ -1449,8 +1796,9 @@
28627
(match_operand:SI 3 "s_register_operand" "r")))]
28628
"TARGET_32BIT && arm_arch6"
28629
"mla%?\\t%0, %2, %1, %3"
28630
- [(set_attr "insn" "mla")
28631
- (set_attr "predicable" "yes")]
28632
+ [(set_attr "type" "mla")
28633
+ (set_attr "predicable" "yes")
28634
+ (set_attr "predicable_short_it" "no")]
28637
(define_insn "*mulsi3addsi_compare0"
28638
@@ -1467,7 +1815,7 @@
28639
"TARGET_ARM && arm_arch6"
28640
"mla%.\\t%0, %2, %1, %3"
28641
[(set_attr "conds" "set")
28642
- (set_attr "insn" "mlas")]
28643
+ (set_attr "type" "mlas")]
28646
(define_insn "*mulsi3addsi_compare0_v6"
28647
@@ -1484,7 +1832,7 @@
28648
"TARGET_ARM && arm_arch6 && optimize_size"
28649
"mla%.\\t%0, %2, %1, %3"
28650
[(set_attr "conds" "set")
28651
- (set_attr "insn" "mlas")]
28652
+ (set_attr "type" "mlas")]
28655
(define_insn "*mulsi3addsi_compare0_scratch"
28656
@@ -1499,7 +1847,7 @@
28657
"TARGET_ARM && !arm_arch6"
28658
"mla%.\\t%0, %2, %1, %3"
28659
[(set_attr "conds" "set")
28660
- (set_attr "insn" "mlas")]
28661
+ (set_attr "type" "mlas")]
28664
(define_insn "*mulsi3addsi_compare0_scratch_v6"
28665
@@ -1514,7 +1862,7 @@
28666
"TARGET_ARM && arm_arch6 && optimize_size"
28667
"mla%.\\t%0, %2, %1, %3"
28668
[(set_attr "conds" "set")
28669
- (set_attr "insn" "mlas")]
28670
+ (set_attr "type" "mlas")]
28673
(define_insn "*mulsi3subsi"
28674
@@ -1525,8 +1873,9 @@
28675
(match_operand:SI 1 "s_register_operand" "r"))))]
28676
"TARGET_32BIT && arm_arch_thumb2"
28677
"mls%?\\t%0, %2, %1, %3"
28678
- [(set_attr "insn" "mla")
28679
- (set_attr "predicable" "yes")]
28680
+ [(set_attr "type" "mla")
28681
+ (set_attr "predicable" "yes")
28682
+ (set_attr "predicable_short_it" "no")]
28685
(define_expand "maddsidi4"
28686
@@ -1548,7 +1897,7 @@
28687
(match_operand:DI 1 "s_register_operand" "0")))]
28688
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28689
"smlal%?\\t%Q0, %R0, %3, %2"
28690
- [(set_attr "insn" "smlal")
28691
+ [(set_attr "type" "smlal")
28692
(set_attr "predicable" "yes")]
28695
@@ -1561,8 +1910,9 @@
28696
(match_operand:DI 1 "s_register_operand" "0")))]
28697
"TARGET_32BIT && arm_arch6"
28698
"smlal%?\\t%Q0, %R0, %3, %2"
28699
- [(set_attr "insn" "smlal")
28700
- (set_attr "predicable" "yes")]
28701
+ [(set_attr "type" "smlal")
28702
+ (set_attr "predicable" "yes")
28703
+ (set_attr "predicable_short_it" "no")]
28706
;; 32x32->64 widening multiply.
28707
@@ -1587,7 +1937,7 @@
28708
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
28709
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28710
"smull%?\\t%Q0, %R0, %1, %2"
28711
- [(set_attr "insn" "smull")
28712
+ [(set_attr "type" "smull")
28713
(set_attr "predicable" "yes")]
28716
@@ -1598,8 +1948,9 @@
28717
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
28718
"TARGET_32BIT && arm_arch6"
28719
"smull%?\\t%Q0, %R0, %1, %2"
28720
- [(set_attr "insn" "smull")
28721
- (set_attr "predicable" "yes")]
28722
+ [(set_attr "type" "smull")
28723
+ (set_attr "predicable" "yes")
28724
+ (set_attr "predicable_short_it" "no")]
28727
(define_expand "umulsidi3"
28728
@@ -1618,7 +1969,7 @@
28729
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
28730
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28731
"umull%?\\t%Q0, %R0, %1, %2"
28732
- [(set_attr "insn" "umull")
28733
+ [(set_attr "type" "umull")
28734
(set_attr "predicable" "yes")]
28737
@@ -1629,8 +1980,9 @@
28738
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
28739
"TARGET_32BIT && arm_arch6"
28740
"umull%?\\t%Q0, %R0, %1, %2"
28741
- [(set_attr "insn" "umull")
28742
- (set_attr "predicable" "yes")]
28743
+ [(set_attr "type" "umull")
28744
+ (set_attr "predicable" "yes")
28745
+ (set_attr "predicable_short_it" "no")]
28748
(define_expand "umaddsidi4"
28749
@@ -1652,7 +2004,7 @@
28750
(match_operand:DI 1 "s_register_operand" "0")))]
28751
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28752
"umlal%?\\t%Q0, %R0, %3, %2"
28753
- [(set_attr "insn" "umlal")
28754
+ [(set_attr "type" "umlal")
28755
(set_attr "predicable" "yes")]
28758
@@ -1665,8 +2017,9 @@
28759
(match_operand:DI 1 "s_register_operand" "0")))]
28760
"TARGET_32BIT && arm_arch6"
28761
"umlal%?\\t%Q0, %R0, %3, %2"
28762
- [(set_attr "insn" "umlal")
28763
- (set_attr "predicable" "yes")]
28764
+ [(set_attr "type" "umlal")
28765
+ (set_attr "predicable" "yes")
28766
+ (set_attr "predicable_short_it" "no")]
28769
(define_expand "smulsi3_highpart"
28770
@@ -1694,7 +2047,7 @@
28771
(clobber (match_scratch:SI 3 "=&r,&r"))]
28772
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28773
"smull%?\\t%3, %0, %2, %1"
28774
- [(set_attr "insn" "smull")
28775
+ [(set_attr "type" "smull")
28776
(set_attr "predicable" "yes")]
28779
@@ -1709,8 +2062,9 @@
28780
(clobber (match_scratch:SI 3 "=r"))]
28781
"TARGET_32BIT && arm_arch6"
28782
"smull%?\\t%3, %0, %2, %1"
28783
- [(set_attr "insn" "smull")
28784
- (set_attr "predicable" "yes")]
28785
+ [(set_attr "type" "smull")
28786
+ (set_attr "predicable" "yes")
28787
+ (set_attr "predicable_short_it" "no")]
28790
(define_expand "umulsi3_highpart"
28791
@@ -1738,7 +2092,7 @@
28792
(clobber (match_scratch:SI 3 "=&r,&r"))]
28793
"TARGET_32BIT && arm_arch3m && !arm_arch6"
28794
"umull%?\\t%3, %0, %2, %1"
28795
- [(set_attr "insn" "umull")
28796
+ [(set_attr "type" "umull")
28797
(set_attr "predicable" "yes")]
28800
@@ -1753,8 +2107,9 @@
28801
(clobber (match_scratch:SI 3 "=r"))]
28802
"TARGET_32BIT && arm_arch6"
28803
"umull%?\\t%3, %0, %2, %1"
28804
- [(set_attr "insn" "umull")
28805
- (set_attr "predicable" "yes")]
28806
+ [(set_attr "type" "umull")
28807
+ (set_attr "predicable" "yes")
28808
+ (set_attr "predicable_short_it" "no")]
28811
(define_insn "mulhisi3"
28812
@@ -1765,7 +2120,7 @@
28813
(match_operand:HI 2 "s_register_operand" "r"))))]
28814
"TARGET_DSP_MULTIPLY"
28815
"smulbb%?\\t%0, %1, %2"
28816
- [(set_attr "insn" "smulxy")
28817
+ [(set_attr "type" "smulxy")
28818
(set_attr "predicable" "yes")]
28821
@@ -1778,8 +2133,9 @@
28822
(match_operand:HI 2 "s_register_operand" "r"))))]
28823
"TARGET_DSP_MULTIPLY"
28824
"smultb%?\\t%0, %1, %2"
28825
- [(set_attr "insn" "smulxy")
28826
- (set_attr "predicable" "yes")]
28827
+ [(set_attr "type" "smulxy")
28828
+ (set_attr "predicable" "yes")
28829
+ (set_attr "predicable_short_it" "no")]
28832
(define_insn "*mulhisi3bt"
28833
@@ -1791,8 +2147,9 @@
28835
"TARGET_DSP_MULTIPLY"
28836
"smulbt%?\\t%0, %1, %2"
28837
- [(set_attr "insn" "smulxy")
28838
- (set_attr "predicable" "yes")]
28839
+ [(set_attr "type" "smulxy")
28840
+ (set_attr "predicable" "yes")
28841
+ (set_attr "predicable_short_it" "no")]
28844
(define_insn "*mulhisi3tt"
28845
@@ -1805,8 +2162,9 @@
28847
"TARGET_DSP_MULTIPLY"
28848
"smultt%?\\t%0, %1, %2"
28849
- [(set_attr "insn" "smulxy")
28850
- (set_attr "predicable" "yes")]
28851
+ [(set_attr "type" "smulxy")
28852
+ (set_attr "predicable" "yes")
28853
+ (set_attr "predicable_short_it" "no")]
28856
(define_insn "maddhisi4"
28857
@@ -1818,8 +2176,9 @@
28858
(match_operand:SI 3 "s_register_operand" "r")))]
28859
"TARGET_DSP_MULTIPLY"
28860
"smlabb%?\\t%0, %1, %2, %3"
28861
- [(set_attr "insn" "smlaxy")
28862
- (set_attr "predicable" "yes")]
28863
+ [(set_attr "type" "smlaxy")
28864
+ (set_attr "predicable" "yes")
28865
+ (set_attr "predicable_short_it" "no")]
28868
;; Note: there is no maddhisi4ibt because this one is canonical form
28869
@@ -1833,8 +2192,9 @@
28870
(match_operand:SI 3 "s_register_operand" "r")))]
28871
"TARGET_DSP_MULTIPLY"
28872
"smlatb%?\\t%0, %1, %2, %3"
28873
- [(set_attr "insn" "smlaxy")
28874
- (set_attr "predicable" "yes")]
28875
+ [(set_attr "type" "smlaxy")
28876
+ (set_attr "predicable" "yes")
28877
+ (set_attr "predicable_short_it" "no")]
28880
(define_insn "*maddhisi4tt"
28881
@@ -1848,22 +2208,24 @@
28882
(match_operand:SI 3 "s_register_operand" "r")))]
28883
"TARGET_DSP_MULTIPLY"
28884
"smlatt%?\\t%0, %1, %2, %3"
28885
- [(set_attr "insn" "smlaxy")
28886
- (set_attr "predicable" "yes")]
28887
+ [(set_attr "type" "smlaxy")
28888
+ (set_attr "predicable" "yes")
28889
+ (set_attr "predicable_short_it" "no")]
28892
(define_insn "maddhidi4"
28893
[(set (match_operand:DI 0 "s_register_operand" "=r")
28895
(mult:DI (sign_extend:DI
28896
- (match_operand:HI 1 "s_register_operand" "r"))
28897
+ (match_operand:HI 1 "s_register_operand" "r"))
28899
(match_operand:HI 2 "s_register_operand" "r")))
28900
(match_operand:DI 3 "s_register_operand" "0")))]
28901
"TARGET_DSP_MULTIPLY"
28902
"smlalbb%?\\t%Q0, %R0, %1, %2"
28903
- [(set_attr "insn" "smlalxy")
28904
- (set_attr "predicable" "yes")])
28905
+ [(set_attr "type" "smlalxy")
28906
+ (set_attr "predicable" "yes")
28907
+ (set_attr "predicable_short_it" "no")])
28909
;; Note: there is no maddhidi4ibt because this one is canonical form
28910
(define_insn "*maddhidi4tb"
28911
@@ -1878,8 +2240,9 @@
28912
(match_operand:DI 3 "s_register_operand" "0")))]
28913
"TARGET_DSP_MULTIPLY"
28914
"smlaltb%?\\t%Q0, %R0, %1, %2"
28915
- [(set_attr "insn" "smlalxy")
28916
- (set_attr "predicable" "yes")])
28917
+ [(set_attr "type" "smlalxy")
28918
+ (set_attr "predicable" "yes")
28919
+ (set_attr "predicable_short_it" "no")])
28921
(define_insn "*maddhidi4tt"
28922
[(set (match_operand:DI 0 "s_register_operand" "=r")
28923
@@ -1895,8 +2258,9 @@
28924
(match_operand:DI 3 "s_register_operand" "0")))]
28925
"TARGET_DSP_MULTIPLY"
28926
"smlaltt%?\\t%Q0, %R0, %1, %2"
28927
- [(set_attr "insn" "smlalxy")
28928
- (set_attr "predicable" "yes")])
28929
+ [(set_attr "type" "smlalxy")
28930
+ (set_attr "predicable" "yes")
28931
+ (set_attr "predicable_short_it" "no")])
28933
(define_expand "mulsf3"
28934
[(set (match_operand:SF 0 "s_register_operand" "")
28935
@@ -2024,13 +2388,49 @@
28939
-(define_insn "*anddi3_insn"
28940
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
28941
- (and:DI (match_operand:DI 1 "s_register_operand" "%0,r")
28942
- (match_operand:DI 2 "s_register_operand" "r,r")))]
28943
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
28945
- [(set_attr "length" "8")]
28946
+(define_insn_and_split "*anddi3_insn"
28947
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w")
28948
+ (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0")
28949
+ (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
28950
+ "TARGET_32BIT && !TARGET_IWMMXT"
28952
+ switch (which_alternative)
28954
+ case 0: /* fall through */
28955
+ case 6: return "vand\t%P0, %P1, %P2";
28956
+ case 1: /* fall through */
28957
+ case 7: return neon_output_logic_immediate ("vand", &operands[2],
28958
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
28962
+ case 5: /* fall through */
28964
+ default: gcc_unreachable ();
28967
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
28968
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
28969
+ [(set (match_dup 3) (match_dup 4))
28970
+ (set (match_dup 5) (match_dup 6))]
28973
+ operands[3] = gen_lowpart (SImode, operands[0]);
28974
+ operands[5] = gen_highpart (SImode, operands[0]);
28976
+ operands[4] = simplify_gen_binary (AND, SImode,
28977
+ gen_lowpart (SImode, operands[1]),
28978
+ gen_lowpart (SImode, operands[2]));
28979
+ operands[6] = simplify_gen_binary (AND, SImode,
28980
+ gen_highpart (SImode, operands[1]),
28981
+ gen_highpart_mode (SImode, DImode, operands[2]));
28984
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
28985
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
28986
+ avoid_neon_for_64bits,avoid_neon_for_64bits")
28987
+ (set_attr "length" "*,*,8,8,8,8,*,*")
28991
(define_insn_and_split "*anddi_zesidi_di"
28992
@@ -2216,7 +2616,7 @@
28993
[(set (reg:CC_NOOV CC_REGNUM)
28994
(compare:CC_NOOV (zero_extract:SI
28995
(match_operand:SI 0 "s_register_operand" "r")
28996
- (match_operand 1 "const_int_operand" "n")
28997
+ (match_operand 1 "const_int_operand" "n")
28998
(match_operand 2 "const_int_operand" "n"))
29001
@@ -2232,6 +2632,7 @@
29003
[(set_attr "conds" "set")
29004
(set_attr "predicable" "yes")
29005
+ (set_attr "predicable_short_it" "no")
29006
(set_attr "type" "simple_alu_imm")]
29009
@@ -2659,7 +3060,8 @@
29011
"bfc%?\t%0, %2, %1"
29012
[(set_attr "length" "4")
29013
- (set_attr "predicable" "yes")]
29014
+ (set_attr "predicable" "yes")
29015
+ (set_attr "predicable_short_it" "no")]
29018
(define_insn "insv_t2"
29019
@@ -2670,7 +3072,8 @@
29021
"bfi%?\t%0, %3, %2, %1"
29022
[(set_attr "length" "4")
29023
- (set_attr "predicable" "yes")]
29024
+ (set_attr "predicable" "yes")
29025
+ (set_attr "predicable_short_it" "no")]
29028
; constants for op 2 will never be given to these patterns.
29029
@@ -2697,7 +3100,7 @@
29030
[(set_attr "length" "8")
29031
(set_attr "predicable" "yes")]
29035
(define_insn_and_split "*anddi_notzesidi_di"
29036
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
29037
(and:DI (not:DI (zero_extend:DI
29038
@@ -2722,9 +3125,10 @@
29039
operands[1] = gen_lowpart (SImode, operands[1]);
29041
[(set_attr "length" "4,8")
29042
- (set_attr "predicable" "yes")]
29043
+ (set_attr "predicable" "yes")
29044
+ (set_attr "predicable_short_it" "no")]
29048
(define_insn_and_split "*anddi_notsesidi_di"
29049
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
29050
(and:DI (not:DI (sign_extend:DI
29051
@@ -2745,16 +3149,18 @@
29052
operands[1] = gen_lowpart (SImode, operands[1]);
29054
[(set_attr "length" "8")
29055
- (set_attr "predicable" "yes")]
29056
+ (set_attr "predicable" "yes")
29057
+ (set_attr "predicable_short_it" "no")]
29061
(define_insn "andsi_notsi_si"
29062
[(set (match_operand:SI 0 "s_register_operand" "=r")
29063
(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
29064
(match_operand:SI 1 "s_register_operand" "r")))]
29066
"bic%?\\t%0, %1, %2"
29067
- [(set_attr "predicable" "yes")]
29068
+ [(set_attr "predicable" "yes")
29069
+ (set_attr "predicable_short_it" "no")]
29072
(define_insn "thumb1_bicsi3"
29073
@@ -2834,7 +3240,8 @@
29074
orr%?\\t%Q0, %Q1, %2
29076
[(set_attr "length" "4,8")
29077
- (set_attr "predicable" "yes")]
29078
+ (set_attr "predicable" "yes")
29079
+ (set_attr "predicable_short_it" "no")]
29082
(define_insn "*iordi_sesidi_di"
29083
@@ -2979,7 +3386,8 @@
29084
eor%?\\t%Q0, %Q1, %2
29086
[(set_attr "length" "4,8")
29087
- (set_attr "predicable" "yes")]
29088
+ (set_attr "predicable" "yes")
29089
+ (set_attr "predicable_short_it" "no")]
29092
(define_insn "*xordi_sesidi_di"
29093
@@ -3096,16 +3504,21 @@
29097
-(define_insn "*andsi_iorsi3_notsi"
29098
+(define_insn_and_split "*andsi_iorsi3_notsi"
29099
[(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
29100
(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
29101
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
29102
(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
29104
- "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
29105
+ "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
29106
+ "&& reload_completed"
29107
+ [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
29108
+ (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))]
29110
[(set_attr "length" "8")
29111
(set_attr "ce_count" "2")
29112
- (set_attr "predicable" "yes")]
29113
+ (set_attr "predicable" "yes")
29114
+ (set_attr "predicable_short_it" "no")]
29117
; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
29118
@@ -3241,7 +3654,8 @@
29121
"bic%?\\t%0, %1, %1, asr #31"
29122
- [(set_attr "predicable" "yes")]
29123
+ [(set_attr "predicable" "yes")
29124
+ (set_attr "predicable_short_it" "no")]
29127
(define_insn "*smax_m1"
29128
@@ -3250,18 +3664,27 @@
29131
"orr%?\\t%0, %1, %1, asr #31"
29132
- [(set_attr "predicable" "yes")]
29133
+ [(set_attr "predicable" "yes")
29134
+ (set_attr "predicable_short_it" "no")]
29137
-(define_insn "*arm_smax_insn"
29138
+(define_insn_and_split "*arm_smax_insn"
29139
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
29140
(smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
29141
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
29142
(clobber (reg:CC CC_REGNUM))]
29145
- cmp\\t%1, %2\;movlt\\t%0, %2
29146
- cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
29148
+ ; cmp\\t%1, %2\;movlt\\t%0, %2
29149
+ ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
29151
+ [(set (reg:CC CC_REGNUM)
29152
+ (compare:CC (match_dup 1) (match_dup 2)))
29153
+ (set (match_dup 0)
29154
+ (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
29158
[(set_attr "conds" "clob")
29159
(set_attr "length" "8,12")]
29161
@@ -3290,18 +3713,27 @@
29164
"and%?\\t%0, %1, %1, asr #31"
29165
- [(set_attr "predicable" "yes")]
29166
+ [(set_attr "predicable" "yes")
29167
+ (set_attr "predicable_short_it" "no")]
29170
-(define_insn "*arm_smin_insn"
29171
+(define_insn_and_split "*arm_smin_insn"
29172
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
29173
(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
29174
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
29175
(clobber (reg:CC CC_REGNUM))]
29178
- cmp\\t%1, %2\;movge\\t%0, %2
29179
- cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
29181
+ ; cmp\\t%1, %2\;movge\\t%0, %2
29182
+ ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
29184
+ [(set (reg:CC CC_REGNUM)
29185
+ (compare:CC (match_dup 1) (match_dup 2)))
29186
+ (set (match_dup 0)
29187
+ (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
29191
[(set_attr "conds" "clob")
29192
(set_attr "length" "8,12")]
29194
@@ -3316,16 +3748,24 @@
29198
-(define_insn "*arm_umaxsi3"
29199
+(define_insn_and_split "*arm_umaxsi3"
29200
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
29201
(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
29202
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
29203
(clobber (reg:CC CC_REGNUM))]
29206
- cmp\\t%1, %2\;movcc\\t%0, %2
29207
- cmp\\t%1, %2\;movcs\\t%0, %1
29208
- cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
29210
+ ; cmp\\t%1, %2\;movcc\\t%0, %2
29211
+ ; cmp\\t%1, %2\;movcs\\t%0, %1
29212
+ ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
29214
+ [(set (reg:CC CC_REGNUM)
29215
+ (compare:CC (match_dup 1) (match_dup 2)))
29216
+ (set (match_dup 0)
29217
+ (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
29221
[(set_attr "conds" "clob")
29222
(set_attr "length" "8,8,12")]
29224
@@ -3340,16 +3780,24 @@
29228
-(define_insn "*arm_uminsi3"
29229
+(define_insn_and_split "*arm_uminsi3"
29230
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
29231
(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
29232
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
29233
(clobber (reg:CC CC_REGNUM))]
29236
- cmp\\t%1, %2\;movcs\\t%0, %2
29237
- cmp\\t%1, %2\;movcc\\t%0, %1
29238
- cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
29240
+ ; cmp\\t%1, %2\;movcs\\t%0, %2
29241
+ ; cmp\\t%1, %2\;movcc\\t%0, %1
29242
+ ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
29244
+ [(set (reg:CC CC_REGNUM)
29245
+ (compare:CC (match_dup 1) (match_dup 2)))
29246
+ (set (match_dup 0)
29247
+ (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
29251
[(set_attr "conds" "clob")
29252
(set_attr "length" "8,8,12")]
29254
@@ -3360,7 +3808,7 @@
29255
[(match_operand:SI 1 "s_register_operand" "r")
29256
(match_operand:SI 2 "s_register_operand" "r")]))
29257
(clobber (reg:CC CC_REGNUM))]
29259
+ "TARGET_32BIT && optimize_insn_for_size_p()"
29261
operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
29262
operands[1], operands[2]);
29263
@@ -3423,6 +3871,50 @@
29267
+; Reject the frame pointer in operand[1], since reloading this after
29268
+; it has been eliminated can cause carnage.
29269
+(define_insn_and_split "*minmax_arithsi_non_canon"
29270
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r")
29272
+ (match_operand:SI 1 "s_register_operand" "0,?r")
29273
+ (match_operator:SI 4 "minmax_operator"
29274
+ [(match_operand:SI 2 "s_register_operand" "r,r")
29275
+ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
29276
+ (clobber (reg:CC CC_REGNUM))]
29277
+ "TARGET_32BIT && !arm_eliminable_register (operands[1])"
29279
+ "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
29280
+ [(set (reg:CC CC_REGNUM)
29281
+ (compare:CC (match_dup 2) (match_dup 3)))
29283
+ (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)])
29284
+ (set (match_dup 0)
29285
+ (minus:SI (match_dup 1)
29287
+ (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
29288
+ (set (match_dup 0)
29289
+ (minus:SI (match_dup 1)
29290
+ (match_dup 3))))]
29292
+ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
29293
+ operands[2], operands[3]);
29294
+ enum rtx_code rc = minmax_code (operands[4]);
29295
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode,
29296
+ operands[2], operands[3]);
29298
+ if (mode == CCFPmode || mode == CCFPEmode)
29299
+ rc = reverse_condition_maybe_unordered (rc);
29301
+ rc = reverse_condition (rc);
29302
+ operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
29304
+ [(set_attr "conds" "clob")
29305
+ (set (attr "length")
29306
+ (if_then_else (eq_attr "is_thumb" "yes")
29308
+ (const_int 12)))]
29311
(define_code_iterator SAT [smin smax])
29312
(define_code_iterator SATrev [smin smax])
29313
(define_code_attr SATlo [(smin "1") (smax "2")])
29314
@@ -3533,13 +4025,26 @@
29318
-(define_insn "arm_ashldi3_1bit"
29319
+(define_insn_and_split "arm_ashldi3_1bit"
29320
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
29321
(ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
29323
(clobber (reg:CC CC_REGNUM))]
29325
- "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
29326
+ "#" ; "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
29327
+ "&& reload_completed"
29328
+ [(parallel [(set (reg:CC CC_REGNUM)
29329
+ (compare:CC (ashift:SI (match_dup 1) (const_int 1))
29331
+ (set (match_dup 0) (ashift:SI (match_dup 1) (const_int 1)))])
29332
+ (set (match_dup 2) (plus:SI (plus:SI (match_dup 3) (match_dup 3))
29333
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
29335
+ operands[2] = gen_highpart (SImode, operands[0]);
29336
+ operands[0] = gen_lowpart (SImode, operands[0]);
29337
+ operands[3] = gen_highpart (SImode, operands[1]);
29338
+ operands[1] = gen_lowpart (SImode, operands[1]);
29340
[(set_attr "conds" "clob")
29341
(set_attr "length" "8")]
29343
@@ -3615,18 +4120,43 @@
29347
-(define_insn "arm_ashrdi3_1bit"
29348
+(define_insn_and_split "arm_ashrdi3_1bit"
29349
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
29350
(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
29352
(clobber (reg:CC CC_REGNUM))]
29354
- "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
29355
+ "#" ; "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
29356
+ "&& reload_completed"
29357
+ [(parallel [(set (reg:CC CC_REGNUM)
29358
+ (compare:CC (ashiftrt:SI (match_dup 3) (const_int 1))
29360
+ (set (match_dup 2) (ashiftrt:SI (match_dup 3) (const_int 1)))])
29361
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
29362
+ (reg:CC_C CC_REGNUM)]
29365
+ operands[2] = gen_highpart (SImode, operands[0]);
29366
+ operands[0] = gen_lowpart (SImode, operands[0]);
29367
+ operands[3] = gen_highpart (SImode, operands[1]);
29368
+ operands[1] = gen_lowpart (SImode, operands[1]);
29370
[(set_attr "conds" "clob")
29371
- (set_attr "insn" "mov")
29372
(set_attr "length" "8")]
29375
+(define_insn "*rrx"
29376
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
29377
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
29378
+ (reg:CC_C CC_REGNUM)]
29381
+ "mov\\t%0, %1, rrx"
29382
+ [(set_attr "conds" "use")
29383
+ (set_attr "insn" "mov")
29384
+ (set_attr "type" "alu_shift")]
29387
(define_expand "ashrsi3"
29388
[(set (match_operand:SI 0 "s_register_operand" "")
29389
(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
29390
@@ -3695,15 +4225,28 @@
29394
-(define_insn "arm_lshrdi3_1bit"
29395
+(define_insn_and_split "arm_lshrdi3_1bit"
29396
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
29397
(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
29399
(clobber (reg:CC CC_REGNUM))]
29401
- "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
29402
+ "#" ; "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
29403
+ "&& reload_completed"
29404
+ [(parallel [(set (reg:CC CC_REGNUM)
29405
+ (compare:CC (lshiftrt:SI (match_dup 3) (const_int 1))
29407
+ (set (match_dup 2) (lshiftrt:SI (match_dup 3) (const_int 1)))])
29408
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
29409
+ (reg:CC_C CC_REGNUM)]
29412
+ operands[2] = gen_highpart (SImode, operands[0]);
29413
+ operands[0] = gen_lowpart (SImode, operands[0]);
29414
+ operands[3] = gen_highpart (SImode, operands[1]);
29415
+ operands[1] = gen_lowpart (SImode, operands[1]);
29417
[(set_attr "conds" "clob")
29418
- (set_attr "insn" "mov")
29419
(set_attr "length" "8")]
29422
@@ -3791,6 +4334,23 @@
29423
(const_string "alu_shift_reg")))]
29426
+(define_insn "*shiftsi3_compare"
29427
+ [(set (reg:CC CC_REGNUM)
29428
+ (compare:CC (match_operator:SI 3 "shift_operator"
29429
+ [(match_operand:SI 1 "s_register_operand" "r")
29430
+ (match_operand:SI 2 "arm_rhs_operand" "rM")])
29432
+ (set (match_operand:SI 0 "s_register_operand" "=r")
29433
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
29435
+ "* return arm_output_shift(operands, 1);"
29436
+ [(set_attr "conds" "set")
29437
+ (set_attr "shift" "1")
29438
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
29439
+ (const_string "alu_shift")
29440
+ (const_string "alu_shift_reg")))]
29443
(define_insn "*shiftsi3_compare0"
29444
[(set (reg:CC_NOOV CC_REGNUM)
29445
(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
29446
@@ -3829,6 +4389,7 @@
29448
"mvn%?\\t%0, %1%S3"
29449
[(set_attr "predicable" "yes")
29450
+ (set_attr "predicable_short_it" "no")
29451
(set_attr "shift" "1")
29452
(set_attr "insn" "mvn")
29453
(set_attr "arch" "32,a")
29454
@@ -4042,6 +4603,7 @@
29455
[(set_attr "arch" "t2,any")
29456
(set_attr "length" "2,4")
29457
(set_attr "predicable" "yes")
29458
+ (set_attr "predicable_short_it" "yes,no")
29459
(set_attr "type" "load1")])
29461
(define_insn "unaligned_loadhis"
29462
@@ -4054,6 +4616,7 @@
29463
[(set_attr "arch" "t2,any")
29464
(set_attr "length" "2,4")
29465
(set_attr "predicable" "yes")
29466
+ (set_attr "predicable_short_it" "yes,no")
29467
(set_attr "type" "load_byte")])
29469
(define_insn "unaligned_loadhiu"
29470
@@ -4066,6 +4629,7 @@
29471
[(set_attr "arch" "t2,any")
29472
(set_attr "length" "2,4")
29473
(set_attr "predicable" "yes")
29474
+ (set_attr "predicable_short_it" "yes,no")
29475
(set_attr "type" "load_byte")])
29477
(define_insn "unaligned_storesi"
29478
@@ -4077,6 +4641,7 @@
29479
[(set_attr "arch" "t2,any")
29480
(set_attr "length" "2,4")
29481
(set_attr "predicable" "yes")
29482
+ (set_attr "predicable_short_it" "yes,no")
29483
(set_attr "type" "store1")])
29485
(define_insn "unaligned_storehi"
29486
@@ -4088,8 +4653,67 @@
29487
[(set_attr "arch" "t2,any")
29488
(set_attr "length" "2,4")
29489
(set_attr "predicable" "yes")
29490
+ (set_attr "predicable_short_it" "yes,no")
29491
(set_attr "type" "store1")])
29493
+;; Unaligned double-word load and store.
29494
+;; Split after reload into two unaligned single-word accesses.
29495
+;; It prevents lower_subreg from splitting some other aligned
29496
+;; double-word accesses too early. Used for internal memcpy.
29498
+(define_insn_and_split "unaligned_loaddi"
29499
+ [(set (match_operand:DI 0 "s_register_operand" "=l,r")
29500
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
29501
+ UNSPEC_UNALIGNED_LOAD))]
29502
+ "unaligned_access && TARGET_32BIT"
29504
+ "&& reload_completed"
29505
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
29506
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
29508
+ operands[2] = gen_highpart (SImode, operands[0]);
29509
+ operands[0] = gen_lowpart (SImode, operands[0]);
29510
+ operands[3] = gen_highpart (SImode, operands[1]);
29511
+ operands[1] = gen_lowpart (SImode, operands[1]);
29513
+ /* If the first destination register overlaps with the base address,
29514
+ swap the order in which the loads are emitted. */
29515
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
29517
+ rtx tmp = operands[1];
29518
+ operands[1] = operands[3];
29519
+ operands[3] = tmp;
29520
+ tmp = operands[0];
29521
+ operands[0] = operands[2];
29522
+ operands[2] = tmp;
29525
+ [(set_attr "arch" "t2,any")
29526
+ (set_attr "length" "4,8")
29527
+ (set_attr "predicable" "yes")
29528
+ (set_attr "type" "load2")])
29530
+(define_insn_and_split "unaligned_storedi"
29531
+ [(set (match_operand:DI 0 "memory_operand" "=o,o")
29532
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
29533
+ UNSPEC_UNALIGNED_STORE))]
29534
+ "unaligned_access && TARGET_32BIT"
29536
+ "&& reload_completed"
29537
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
29538
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
29540
+ operands[2] = gen_highpart (SImode, operands[0]);
29541
+ operands[0] = gen_lowpart (SImode, operands[0]);
29542
+ operands[3] = gen_highpart (SImode, operands[1]);
29543
+ operands[1] = gen_lowpart (SImode, operands[1]);
29545
+ [(set_attr "arch" "t2,any")
29546
+ (set_attr "length" "4,8")
29547
+ (set_attr "predicable" "yes")
29548
+ (set_attr "type" "store2")])
29551
(define_insn "*extv_reg"
29552
[(set (match_operand:SI 0 "s_register_operand" "=r")
29553
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
29554
@@ -4098,7 +4722,8 @@
29556
"sbfx%?\t%0, %1, %3, %2"
29557
[(set_attr "length" "4")
29558
- (set_attr "predicable" "yes")]
29559
+ (set_attr "predicable" "yes")
29560
+ (set_attr "predicable_short_it" "no")]
29563
(define_insn "extzv_t2"
29564
@@ -4109,7 +4734,8 @@
29566
"ubfx%?\t%0, %1, %3, %2"
29567
[(set_attr "length" "4")
29568
- (set_attr "predicable" "yes")]
29569
+ (set_attr "predicable" "yes")
29570
+ (set_attr "predicable_short_it" "no")]
29574
@@ -4121,7 +4747,8 @@
29576
"sdiv%?\t%0, %1, %2"
29577
[(set_attr "predicable" "yes")
29578
- (set_attr "insn" "sdiv")]
29579
+ (set_attr "predicable_short_it" "no")
29580
+ (set_attr "type" "sdiv")]
29583
(define_insn "udivsi3"
29584
@@ -4131,7 +4758,8 @@
29586
"udiv%?\t%0, %1, %2"
29587
[(set_attr "predicable" "yes")
29588
- (set_attr "insn" "udiv")]
29589
+ (set_attr "predicable_short_it" "no")
29590
+ (set_attr "type" "udiv")]
29594
@@ -4154,12 +4782,24 @@
29596
;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
29597
;; The first alternative allows the common case of a *full* overlap.
29598
-(define_insn "*arm_negdi2"
29599
+(define_insn_and_split "*arm_negdi2"
29600
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
29601
(neg:DI (match_operand:DI 1 "s_register_operand" "0,r")))
29602
(clobber (reg:CC CC_REGNUM))]
29604
- "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
29605
+ "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
29606
+ "&& reload_completed"
29607
+ [(parallel [(set (reg:CC CC_REGNUM)
29608
+ (compare:CC (const_int 0) (match_dup 1)))
29609
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
29610
+ (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
29611
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
29613
+ operands[2] = gen_highpart (SImode, operands[0]);
29614
+ operands[0] = gen_lowpart (SImode, operands[0]);
29615
+ operands[3] = gen_highpart (SImode, operands[1]);
29616
+ operands[1] = gen_lowpart (SImode, operands[1]);
29618
[(set_attr "conds" "clob")
29619
(set_attr "length" "8")]
29621
@@ -4181,11 +4821,14 @@
29624
(define_insn "*arm_negsi2"
29625
- [(set (match_operand:SI 0 "s_register_operand" "=r")
29626
- (neg:SI (match_operand:SI 1 "s_register_operand" "r")))]
29627
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
29628
+ (neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
29630
"rsb%?\\t%0, %1, #0"
29631
- [(set_attr "predicable" "yes")]
29632
+ [(set_attr "predicable" "yes")
29633
+ (set_attr "predicable_short_it" "yes,no")
29634
+ (set_attr "arch" "t2,*")
29635
+ (set_attr "length" "4")]
29638
(define_insn "*thumb1_negsi2"
29639
@@ -4209,6 +4852,73 @@
29640
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
29643
+;; Negate an extended 32-bit value.
29644
+(define_insn_and_split "*negdi_extendsidi"
29645
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r,l,&l")
29646
+ (neg:DI (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r,0,l"))))
29647
+ (clobber (reg:CC CC_REGNUM))]
29649
+ "#" ; rsb\\t%Q0, %1, #0\;asr\\t%R0, %Q0, #31
29650
+ "&& reload_completed"
29653
+ operands[2] = gen_highpart (SImode, operands[0]);
29654
+ operands[0] = gen_lowpart (SImode, operands[0]);
29655
+ rtx tmp = gen_rtx_SET (VOIDmode,
29657
+ gen_rtx_MINUS (SImode,
29666
+ /* Set the flags, to emit the short encoding in Thumb2. */
29667
+ rtx flags = gen_rtx_SET (VOIDmode,
29668
+ gen_rtx_REG (CCmode, CC_REGNUM),
29669
+ gen_rtx_COMPARE (CCmode,
29672
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
29677
+ emit_insn (gen_rtx_SET (VOIDmode,
29679
+ gen_rtx_ASHIFTRT (SImode,
29684
+ [(set_attr "length" "8,8,4,4")
29685
+ (set_attr "arch" "a,a,t2,t2")]
29688
+(define_insn_and_split "*negdi_zero_extendsidi"
29689
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r")
29690
+ (neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))))
29691
+ (clobber (reg:CC CC_REGNUM))]
29693
+ "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0"
29694
+ ;; Don't care what register is input to sbc,
29695
+ ;; since we just just need to propagate the carry.
29696
+ "&& reload_completed"
29697
+ [(parallel [(set (reg:CC CC_REGNUM)
29698
+ (compare:CC (const_int 0) (match_dup 1)))
29699
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
29700
+ (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2))
29701
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
29703
+ operands[2] = gen_highpart (SImode, operands[0]);
29704
+ operands[0] = gen_lowpart (SImode, operands[0]);
29706
+ [(set_attr "conds" "clob")
29707
+ (set_attr "length" "8")] ;; length in thumb is 4
29710
;; abssi2 doesn't really clobber the condition codes if a different register
29711
;; is being set. To keep things simple, assume during rtl manipulations that
29712
;; it does, but tell the final scan operator the truth. Similarly for
29713
@@ -4227,14 +4937,67 @@
29714
operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
29717
-(define_insn "*arm_abssi2"
29718
+(define_insn_and_split "*arm_abssi2"
29719
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
29720
(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
29721
(clobber (reg:CC CC_REGNUM))]
29724
- cmp\\t%0, #0\;rsblt\\t%0, %0, #0
29725
- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
29727
+ "&& reload_completed"
29730
+ /* if (which_alternative == 0) */
29731
+ if (REGNO(operands[0]) == REGNO(operands[1]))
29733
+ /* Emit the pattern:
29734
+ cmp\\t%0, #0\;rsblt\\t%0, %0, #0
29735
+ [(set (reg:CC CC_REGNUM)
29736
+ (compare:CC (match_dup 0) (const_int 0)))
29737
+ (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0))
29738
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))]
29740
+ emit_insn (gen_rtx_SET (VOIDmode,
29741
+ gen_rtx_REG (CCmode, CC_REGNUM),
29742
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
29743
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
29744
+ (gen_rtx_LT (SImode,
29745
+ gen_rtx_REG (CCmode, CC_REGNUM),
29747
+ (gen_rtx_SET (VOIDmode,
29749
+ (gen_rtx_MINUS (SImode,
29751
+ operands[1]))))));
29756
+ /* Emit the pattern:
29757
+ alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
29758
+ [(set (match_dup 0)
29759
+ (xor:SI (match_dup 1)
29760
+ (ashiftrt:SI (match_dup 1) (const_int 31))))
29761
+ (set (match_dup 0)
29762
+ (minus:SI (match_dup 0)
29763
+ (ashiftrt:SI (match_dup 1) (const_int 31))))]
29765
+ emit_insn (gen_rtx_SET (VOIDmode,
29767
+ gen_rtx_XOR (SImode,
29768
+ gen_rtx_ASHIFTRT (SImode,
29772
+ emit_insn (gen_rtx_SET (VOIDmode,
29774
+ gen_rtx_MINUS (SImode,
29776
+ gen_rtx_ASHIFTRT (SImode,
29778
+ GEN_INT (31)))));
29782
[(set_attr "conds" "clob,*")
29783
(set_attr "shift" "1")
29784
(set_attr "predicable" "no, yes")
29785
@@ -4255,14 +5018,56 @@
29786
[(set_attr "length" "6")]
29789
-(define_insn "*arm_neg_abssi2"
29790
+(define_insn_and_split "*arm_neg_abssi2"
29791
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
29792
(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
29793
(clobber (reg:CC CC_REGNUM))]
29796
- cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
29797
- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
29799
+ "&& reload_completed"
29802
+ /* if (which_alternative == 0) */
29803
+ if (REGNO (operands[0]) == REGNO (operands[1]))
29805
+ /* Emit the pattern:
29806
+ cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
29808
+ emit_insn (gen_rtx_SET (VOIDmode,
29809
+ gen_rtx_REG (CCmode, CC_REGNUM),
29810
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
29811
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
29812
+ gen_rtx_GT (SImode,
29813
+ gen_rtx_REG (CCmode, CC_REGNUM),
29815
+ gen_rtx_SET (VOIDmode,
29817
+ (gen_rtx_MINUS (SImode,
29819
+ operands[1])))));
29823
+ /* Emit the pattern:
29824
+ eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
29826
+ emit_insn (gen_rtx_SET (VOIDmode,
29828
+ gen_rtx_XOR (SImode,
29829
+ gen_rtx_ASHIFTRT (SImode,
29833
+ emit_insn (gen_rtx_SET (VOIDmode,
29835
+ gen_rtx_MINUS (SImode,
29836
+ gen_rtx_ASHIFTRT (SImode,
29843
[(set_attr "conds" "clob,*")
29844
(set_attr "shift" "1")
29845
(set_attr "predicable" "no, yes")
29846
@@ -4330,7 +5135,7 @@
29847
[(set_attr "length" "*,8,8,*")
29848
(set_attr "predicable" "no,yes,yes,no")
29849
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
29850
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
29851
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
29854
(define_expand "one_cmplsi2"
29855
@@ -4341,11 +5146,14 @@
29858
(define_insn "*arm_one_cmplsi2"
29859
- [(set (match_operand:SI 0 "s_register_operand" "=r")
29860
- (not:SI (match_operand:SI 1 "s_register_operand" "r")))]
29861
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
29862
+ (not:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
29865
[(set_attr "predicable" "yes")
29866
+ (set_attr "predicable_short_it" "yes,no")
29867
+ (set_attr "arch" "t2,*")
29868
+ (set_attr "length" "4")
29869
(set_attr "insn" "mvn")]
29872
@@ -4498,7 +5306,7 @@
29873
"TARGET_32BIT <qhs_zextenddi_cond>"
29875
[(set_attr "length" "8,4,8,8")
29876
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")
29877
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
29878
(set_attr "ce_count" "2")
29879
(set_attr "predicable" "yes")]
29881
@@ -4513,7 +5321,7 @@
29882
(set_attr "ce_count" "2")
29883
(set_attr "shift" "1")
29884
(set_attr "predicable" "yes")
29885
- (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
29886
+ (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
29889
;; Splits for all extensions to DImode
29890
@@ -4671,7 +5479,8 @@
29892
"uxtah%?\\t%0, %2, %1"
29893
[(set_attr "type" "alu_shift")
29894
- (set_attr "predicable" "yes")]
29895
+ (set_attr "predicable" "yes")
29896
+ (set_attr "predicable_short_it" "no")]
29899
(define_expand "zero_extendqisi2"
29900
@@ -4764,6 +5573,7 @@
29902
"uxtab%?\\t%0, %2, %1"
29903
[(set_attr "predicable" "yes")
29904
+ (set_attr "predicable_short_it" "no")
29905
(set_attr "insn" "xtab")
29906
(set_attr "type" "alu_shift")]
29908
@@ -4816,7 +5626,8 @@
29911
[(set_attr "conds" "set")
29912
- (set_attr "predicable" "yes")]
29913
+ (set_attr "predicable" "yes")
29914
+ (set_attr "predicable_short_it" "no")]
29917
(define_expand "extendhisi2"
29918
@@ -5002,6 +5813,7 @@
29919
ldr%(sh%)\\t%0, %1"
29920
[(set_attr "type" "simple_alu_shift,load_byte")
29921
(set_attr "predicable" "yes")
29922
+ (set_attr "predicable_short_it" "no")
29923
(set_attr "pool_range" "*,256")
29924
(set_attr "neg_pool_range" "*,244")]
29926
@@ -5114,7 +5926,8 @@
29927
"sxtab%?\\t%0, %2, %1"
29928
[(set_attr "type" "alu_shift")
29929
(set_attr "insn" "xtab")
29930
- (set_attr "predicable" "yes")]
29931
+ (set_attr "predicable" "yes")
29932
+ (set_attr "predicable_short_it" "no")]
29936
@@ -5313,8 +6126,8 @@
29939
(define_insn "*arm_movdi"
29940
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
29941
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))]
29942
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
29943
+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))]
29945
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
29947
@@ -5570,6 +6383,7 @@
29949
"movt%?\t%0, #:upper16:%c2"
29950
[(set_attr "predicable" "yes")
29951
+ (set_attr "predicable_short_it" "no")
29952
(set_attr "length" "4")]
29955
@@ -6449,26 +7263,28 @@
29960
(define_insn "*arm_movqi_insn"
29961
- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,Uu,r,m")
29962
- (match_operand:QI 1 "general_operand" "r,I,K,Uu,l,m,r"))]
29963
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m")
29964
+ (match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))]
29966
&& ( register_operand (operands[0], QImode)
29967
|| register_operand (operands[1], QImode))"
29978
- [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
29979
- (set_attr "insn" "mov,mov,mvn,*,*,*,*")
29980
+ [(set_attr "type" "*,*,simple_alu_imm,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
29981
+ (set_attr "insn" "mov,mov,mov,mov,mvn,*,*,*,*")
29982
(set_attr "predicable" "yes")
29983
- (set_attr "arch" "any,any,any,t2,t2,any,any")
29984
- (set_attr "length" "4,4,4,2,2,4,4")]
29985
+ (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no")
29986
+ (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any")
29987
+ (set_attr "length" "2,4,4,2,4,2,2,4,4")]
29990
(define_insn "*thumb1_movqi_insn"
29991
@@ -6738,8 +7554,8 @@
29994
(define_insn "*movdf_soft_insn"
29995
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
29996
- (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
29997
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
29998
+ (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
29999
"TARGET_32BIT && TARGET_SOFT_FLOAT
30000
&& ( register_operand (operands[0], DFmode)
30001
|| register_operand (operands[1], DFmode))"
30002
@@ -6869,10 +7685,18 @@
30003
(match_operand:BLK 1 "general_operand" "")
30004
(match_operand:SI 2 "const_int_operand" "")
30005
(match_operand:SI 3 "const_int_operand" "")]
30011
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
30012
+ && !optimize_function_for_size_p (cfun))
30014
+ if (gen_movmem_ldrd_strd (operands))
30019
if (arm_gen_movmemqi (operands))
30022
@@ -7617,23 +8441,64 @@
30023
;; if-conversion can not reduce to a conditional compare, so we do
30026
-(define_insn "*arm_cmpdi_insn"
30027
+(define_insn_and_split "*arm_cmpdi_insn"
30028
[(set (reg:CC_NCV CC_REGNUM)
30029
(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
30030
(match_operand:DI 1 "arm_di_operand" "rDi")))
30031
(clobber (match_scratch:SI 2 "=r"))]
30033
- "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
30034
+ "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
30035
+ "&& reload_completed"
30036
+ [(set (reg:CC CC_REGNUM)
30037
+ (compare:CC (match_dup 0) (match_dup 1)))
30038
+ (parallel [(set (reg:CC CC_REGNUM)
30039
+ (compare:CC (match_dup 3) (match_dup 4)))
30040
+ (set (match_dup 2)
30041
+ (minus:SI (match_dup 5)
30042
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
30044
+ operands[3] = gen_highpart (SImode, operands[0]);
30045
+ operands[0] = gen_lowpart (SImode, operands[0]);
30046
+ if (CONST_INT_P (operands[1]))
30048
+ operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode,
30051
+ operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]);
30055
+ operands[4] = gen_highpart (SImode, operands[1]);
30056
+ operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]);
30058
+ operands[1] = gen_lowpart (SImode, operands[1]);
30059
+ operands[2] = gen_lowpart (SImode, operands[2]);
30061
[(set_attr "conds" "set")
30062
(set_attr "length" "8")]
30065
-(define_insn "*arm_cmpdi_unsigned"
30066
+(define_insn_and_split "*arm_cmpdi_unsigned"
30067
[(set (reg:CC_CZ CC_REGNUM)
30068
(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
30069
(match_operand:DI 1 "arm_di_operand" "rDi")))]
30071
- "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
30072
+ "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
30073
+ "&& reload_completed"
30074
+ [(set (reg:CC CC_REGNUM)
30075
+ (compare:CC (match_dup 2) (match_dup 3)))
30076
+ (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0))
30077
+ (set (reg:CC CC_REGNUM)
30078
+ (compare:CC (match_dup 0) (match_dup 1))))]
30080
+ operands[2] = gen_highpart (SImode, operands[0]);
30081
+ operands[0] = gen_lowpart (SImode, operands[0]);
30082
+ if (CONST_INT_P (operands[1]))
30083
+ operands[3] = gen_highpart_mode (SImode, DImode, operands[1]);
30085
+ operands[3] = gen_highpart (SImode, operands[1]);
30086
+ operands[1] = gen_lowpart (SImode, operands[1]);
30088
[(set_attr "conds" "set")
30089
(set_attr "length" "8")]
30091
@@ -7758,36 +8623,56 @@
30092
operands[3] = const0_rtx;"
30095
-(define_insn "*mov_scc"
30096
+(define_insn_and_split "*mov_scc"
30097
[(set (match_operand:SI 0 "s_register_operand" "=r")
30098
(match_operator:SI 1 "arm_comparison_operator"
30099
[(match_operand 2 "cc_register" "") (const_int 0)]))]
30101
- "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
30102
+ "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
30104
+ [(set (match_dup 0)
30105
+ (if_then_else:SI (match_dup 1)
30109
[(set_attr "conds" "use")
30110
- (set_attr "insn" "mov")
30111
(set_attr "length" "8")]
30114
-(define_insn "*mov_negscc"
30115
+(define_insn_and_split "*mov_negscc"
30116
[(set (match_operand:SI 0 "s_register_operand" "=r")
30117
(neg:SI (match_operator:SI 1 "arm_comparison_operator"
30118
[(match_operand 2 "cc_register" "") (const_int 0)])))]
30120
- "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
30121
+ "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
30123
+ [(set (match_dup 0)
30124
+ (if_then_else:SI (match_dup 1)
30128
+ operands[3] = GEN_INT (~0);
30130
[(set_attr "conds" "use")
30131
- (set_attr "insn" "mov")
30132
(set_attr "length" "8")]
30135
-(define_insn "*mov_notscc"
30136
+(define_insn_and_split "*mov_notscc"
30137
[(set (match_operand:SI 0 "s_register_operand" "=r")
30138
(not:SI (match_operator:SI 1 "arm_comparison_operator"
30139
[(match_operand 2 "cc_register" "") (const_int 0)])))]
30141
- "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
30142
+ "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
30144
+ [(set (match_dup 0)
30145
+ (if_then_else:SI (match_dup 1)
30149
+ operands[3] = GEN_INT (~1);
30150
+ operands[4] = GEN_INT (~0);
30152
[(set_attr "conds" "use")
30153
- (set_attr "insn" "mov")
30154
(set_attr "length" "8")]
30157
@@ -8069,7 +8954,7 @@
30159
(define_expand "movsfcc"
30160
[(set (match_operand:SF 0 "s_register_operand" "")
30161
- (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
30162
+ (if_then_else:SF (match_operand 1 "arm_cond_move_operator" "")
30163
(match_operand:SF 2 "s_register_operand" "")
30164
(match_operand:SF 3 "s_register_operand" "")))]
30165
"TARGET_32BIT && TARGET_HARD_FLOAT"
30166
@@ -8091,7 +8976,7 @@
30168
(define_expand "movdfcc"
30169
[(set (match_operand:DF 0 "s_register_operand" "")
30170
- (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
30171
+ (if_then_else:DF (match_operand 1 "arm_cond_move_operator" "")
30172
(match_operand:DF 2 "s_register_operand" "")
30173
(match_operand:DF 3 "s_register_operand" "")))]
30174
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
30175
@@ -8110,7 +8995,40 @@
30179
-(define_insn "*movsicc_insn"
30180
+(define_insn "*cmov<mode>"
30181
+ [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
30182
+ (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator"
30183
+ [(match_operand 2 "cc_register" "") (const_int 0)])
30184
+ (match_operand:SDF 3 "s_register_operand"
30185
+ "<F_constraint>")
30186
+ (match_operand:SDF 4 "s_register_operand"
30187
+ "<F_constraint>")))]
30188
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
30191
+ enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
30198
+ return \"vsel%d1.<V_if_elem>\\t%<V_reg>0, %<V_reg>3, %<V_reg>4\";
30203
+ return \"vsel%D1.<V_if_elem>\\t%<V_reg>0, %<V_reg>4, %<V_reg>3\";
30205
+ gcc_unreachable ();
30209
+ [(set_attr "conds" "use")
30210
+ (set_attr "type" "f_sel<vfp_type>")]
30213
+(define_insn_and_split "*movsicc_insn"
30214
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
30216
(match_operator 3 "arm_comparison_operator"
30217
@@ -8123,10 +9041,45 @@
30221
- mov%d3\\t%0, %1\;mov%D3\\t%0, %2
30222
- mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
30223
- mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
30224
- mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
30229
+ ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2
30230
+ ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
30231
+ ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
30232
+ ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
30233
+ "&& reload_completed"
30236
+ enum rtx_code rev_code;
30237
+ enum machine_mode mode;
30240
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
30242
+ gen_rtx_SET (VOIDmode,
30246
+ rev_code = GET_CODE (operands[3]);
30247
+ mode = GET_MODE (operands[4]);
30248
+ if (mode == CCFPmode || mode == CCFPEmode)
30249
+ rev_code = reverse_condition_maybe_unordered (rev_code);
30251
+ rev_code = reverse_condition (rev_code);
30253
+ rev_cond = gen_rtx_fmt_ee (rev_code,
30257
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
30259
+ gen_rtx_SET (VOIDmode,
30264
[(set_attr "length" "4,4,4,4,8,8,8,8")
30265
(set_attr "conds" "use")
30266
(set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")
30267
@@ -8255,7 +9208,7 @@
30268
(match_operand 1 "" ""))
30269
(use (match_operand 2 "" ""))
30270
(clobber (reg:SI LR_REGNUM))]
30271
- "TARGET_ARM && arm_arch5"
30272
+ "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
30274
[(set_attr "type" "call")]
30276
@@ -8265,7 +9218,7 @@
30277
(match_operand 1 "" ""))
30278
(use (match_operand 2 "" ""))
30279
(clobber (reg:SI LR_REGNUM))]
30280
- "TARGET_ARM && !arm_arch5"
30281
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
30283
return output_call (operands);
30285
@@ -8284,7 +9237,7 @@
30286
(match_operand 1 "" ""))
30287
(use (match_operand 2 "" ""))
30288
(clobber (reg:SI LR_REGNUM))]
30289
- "TARGET_ARM && !arm_arch5"
30290
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
30292
return output_call_mem (operands);
30294
@@ -8297,7 +9250,7 @@
30295
(match_operand 1 "" ""))
30296
(use (match_operand 2 "" ""))
30297
(clobber (reg:SI LR_REGNUM))]
30298
- "TARGET_THUMB1 && arm_arch5"
30299
+ "TARGET_THUMB1 && arm_arch5 && !SIBLING_CALL_P (insn)"
30301
[(set_attr "length" "2")
30302
(set_attr "type" "call")]
30303
@@ -8308,7 +9261,7 @@
30304
(match_operand 1 "" ""))
30305
(use (match_operand 2 "" ""))
30306
(clobber (reg:SI LR_REGNUM))]
30307
- "TARGET_THUMB1 && !arm_arch5"
30308
+ "TARGET_THUMB1 && !arm_arch5 && !SIBLING_CALL_P (insn)"
30311
if (!TARGET_CALLER_INTERWORKING)
30312
@@ -8367,7 +9320,7 @@
30313
(match_operand 2 "" "")))
30314
(use (match_operand 3 "" ""))
30315
(clobber (reg:SI LR_REGNUM))]
30316
- "TARGET_ARM && arm_arch5"
30317
+ "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
30319
[(set_attr "type" "call")]
30321
@@ -8378,7 +9331,7 @@
30322
(match_operand 2 "" "")))
30323
(use (match_operand 3 "" ""))
30324
(clobber (reg:SI LR_REGNUM))]
30325
- "TARGET_ARM && !arm_arch5"
30326
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
30328
return output_call (&operands[1]);
30330
@@ -8394,7 +9347,8 @@
30331
(match_operand 2 "" "")))
30332
(use (match_operand 3 "" ""))
30333
(clobber (reg:SI LR_REGNUM))]
30334
- "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))"
30335
+ "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))
30336
+ && !SIBLING_CALL_P (insn)"
30338
return output_call_mem (&operands[1]);
30340
@@ -8444,6 +9398,7 @@
30341
(use (match_operand 2 "" ""))
30342
(clobber (reg:SI LR_REGNUM))]
30344
+ && !SIBLING_CALL_P (insn)
30345
&& (GET_CODE (operands[0]) == SYMBOL_REF)
30346
&& !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
30348
@@ -8460,6 +9415,7 @@
30349
(use (match_operand 3 "" ""))
30350
(clobber (reg:SI LR_REGNUM))]
30352
+ && !SIBLING_CALL_P (insn)
30353
&& (GET_CODE (operands[1]) == SYMBOL_REF)
30354
&& !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
30356
@@ -8505,6 +9461,10 @@
30360
+ if (!REG_P (XEXP (operands[0], 0))
30361
+ && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
30362
+ XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
30364
if (operands[2] == NULL_RTX)
30365
operands[2] = const0_rtx;
30367
@@ -8519,47 +9479,67 @@
30371
+ if (!REG_P (XEXP (operands[1], 0)) &&
30372
+ (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
30373
+ XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
30375
if (operands[3] == NULL_RTX)
30376
operands[3] = const0_rtx;
30380
(define_insn "*sibcall_insn"
30381
- [(call (mem:SI (match_operand:SI 0 "" "X"))
30382
+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs, US"))
30383
(match_operand 1 "" ""))
30385
(use (match_operand 2 "" ""))]
30386
- "TARGET_32BIT && GET_CODE (operands[0]) == SYMBOL_REF"
30387
+ "TARGET_32BIT && SIBLING_CALL_P (insn)"
30389
- return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
30390
+ if (which_alternative == 1)
30391
+ return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
30394
+ if (arm_arch5 || arm_arch4t)
30395
+ return \"bx%?\\t%0\\t%@ indirect register sibling call\";
30397
+ return \"mov%?\\t%|pc, %0\\t%@ indirect register sibling call\";
30400
[(set_attr "type" "call")]
30403
(define_insn "*sibcall_value_insn"
30404
[(set (match_operand 0 "" "")
30405
- (call (mem:SI (match_operand:SI 1 "" "X"))
30406
+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,US"))
30407
(match_operand 2 "" "")))
30409
(use (match_operand 3 "" ""))]
30410
- "TARGET_32BIT && GET_CODE (operands[1]) == SYMBOL_REF"
30411
+ "TARGET_32BIT && SIBLING_CALL_P (insn)"
30413
- return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
30414
+ if (which_alternative == 1)
30415
+ return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
30418
+ if (arm_arch5 || arm_arch4t)
30419
+ return \"bx%?\\t%1\";
30421
+ return \"mov%?\\t%|pc, %1\\t@ indirect sibling call \";
30424
[(set_attr "type" "call")]
30427
-(define_expand "return"
30429
+(define_expand "<return_str>return"
30431
"(TARGET_ARM || (TARGET_THUMB2
30432
&& ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
30433
&& !IS_STACKALIGN (arm_current_func_type ())))
30434
- && USE_RETURN_INSN (FALSE)"
30435
+ <return_cond_false>"
30440
- thumb2_expand_return ();
30441
+ thumb2_expand_return (<return_simple_p>);
30445
@@ -8584,13 +9564,13 @@
30446
(set_attr "predicable" "yes")]
30449
-(define_insn "*cond_return"
30450
+(define_insn "*cond_<return_str>return"
30452
(if_then_else (match_operator 0 "arm_comparison_operator"
30453
[(match_operand 1 "cc_register" "") (const_int 0)])
30457
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
30458
+ "TARGET_ARM <return_cond_true>"
30461
if (arm_ccfsm_state == 2)
30462
@@ -8598,20 +9578,21 @@
30463
arm_ccfsm_state += 2;
30466
- return output_return_instruction (operands[0], true, false, false);
30467
+ return output_return_instruction (operands[0], true, false,
30468
+ <return_simple_p>);
30470
[(set_attr "conds" "use")
30471
(set_attr "length" "12")
30472
(set_attr "type" "load1")]
30475
-(define_insn "*cond_return_inverted"
30476
+(define_insn "*cond_<return_str>return_inverted"
30478
(if_then_else (match_operator 0 "arm_comparison_operator"
30479
[(match_operand 1 "cc_register" "") (const_int 0)])
30482
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
30484
+ "TARGET_ARM <return_cond_true>"
30487
if (arm_ccfsm_state == 2)
30488
@@ -8619,7 +9600,8 @@
30489
arm_ccfsm_state += 2;
30492
- return output_return_instruction (operands[0], true, true, false);
30493
+ return output_return_instruction (operands[0], true, true,
30494
+ <return_simple_p>);
30496
[(set_attr "conds" "use")
30497
(set_attr "length" "12")
30498
@@ -9095,27 +10077,64 @@
30499
(set_attr "type" "alu_shift,alu_shift_reg")])
30502
-(define_insn "*and_scc"
30503
+(define_insn_and_split "*and_scc"
30504
[(set (match_operand:SI 0 "s_register_operand" "=r")
30505
(and:SI (match_operator:SI 1 "arm_comparison_operator"
30506
- [(match_operand 3 "cc_register" "") (const_int 0)])
30507
- (match_operand:SI 2 "s_register_operand" "r")))]
30508
+ [(match_operand 2 "cc_register" "") (const_int 0)])
30509
+ (match_operand:SI 3 "s_register_operand" "r")))]
30511
- "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
30512
+ "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
30513
+ "&& reload_completed"
30514
+ [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
30515
+ (cond_exec (match_dup 4) (set (match_dup 0)
30516
+ (and:SI (match_dup 3) (const_int 1))))]
30518
+ enum machine_mode mode = GET_MODE (operands[2]);
30519
+ enum rtx_code rc = GET_CODE (operands[1]);
30521
+ /* Note that operands[4] is the same as operands[1],
30522
+ but with VOIDmode as the result. */
30523
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
30524
+ if (mode == CCFPmode || mode == CCFPEmode)
30525
+ rc = reverse_condition_maybe_unordered (rc);
30527
+ rc = reverse_condition (rc);
30528
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
30530
[(set_attr "conds" "use")
30531
(set_attr "insn" "mov")
30532
(set_attr "length" "8")]
30535
-(define_insn "*ior_scc"
30536
+(define_insn_and_split "*ior_scc"
30537
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
30538
- (ior:SI (match_operator:SI 2 "arm_comparison_operator"
30539
- [(match_operand 3 "cc_register" "") (const_int 0)])
30540
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
30541
+ (ior:SI (match_operator:SI 1 "arm_comparison_operator"
30542
+ [(match_operand 2 "cc_register" "") (const_int 0)])
30543
+ (match_operand:SI 3 "s_register_operand" "0,?r")))]
30546
- orr%d2\\t%0, %1, #1
30547
- mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
30548
+ orr%d1\\t%0, %3, #1
30550
+ "&& reload_completed
30551
+ && REGNO (operands [0]) != REGNO (operands[3])"
30552
+ ;; && which_alternative == 1
30553
+ ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
30554
+ [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
30555
+ (cond_exec (match_dup 4) (set (match_dup 0)
30556
+ (ior:SI (match_dup 3) (const_int 1))))]
30558
+ enum machine_mode mode = GET_MODE (operands[2]);
30559
+ enum rtx_code rc = GET_CODE (operands[1]);
30561
+ /* Note that operands[4] is the same as operands[1],
30562
+ but with VOIDmode as the result. */
30563
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
30564
+ if (mode == CCFPmode || mode == CCFPEmode)
30565
+ rc = reverse_condition_maybe_unordered (rc);
30567
+ rc = reverse_condition (rc);
30568
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
30570
[(set_attr "conds" "use")
30571
(set_attr "length" "4,8")]
30573
@@ -9184,7 +10203,7 @@
30574
(set (match_dup 0) (const_int 1)))])
30576
(define_insn_and_split "*compare_scc"
30577
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
30578
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
30579
(match_operator:SI 1 "arm_comparison_operator"
30580
[(match_operand:SI 2 "s_register_operand" "r,r")
30581
(match_operand:SI 3 "arm_add_operand" "rI,L")]))
30582
@@ -9636,7 +10655,7 @@
30585
(define_insn_and_split "*ior_scc_scc"
30586
- [(set (match_operand:SI 0 "s_register_operand" "=r")
30587
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
30588
(ior:SI (match_operator:SI 3 "arm_comparison_operator"
30589
[(match_operand:SI 1 "s_register_operand" "r")
30590
(match_operand:SI 2 "arm_add_operand" "rIL")])
30591
@@ -9674,7 +10693,7 @@
30592
[(match_operand:SI 4 "s_register_operand" "r")
30593
(match_operand:SI 5 "arm_add_operand" "rIL")]))
30595
- (set (match_operand:SI 7 "s_register_operand" "=r")
30596
+ (set (match_operand:SI 7 "s_register_operand" "=Ts")
30597
(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
30598
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
30600
@@ -9692,7 +10711,7 @@
30601
(set_attr "length" "16")])
30603
(define_insn_and_split "*and_scc_scc"
30604
- [(set (match_operand:SI 0 "s_register_operand" "=r")
30605
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
30606
(and:SI (match_operator:SI 3 "arm_comparison_operator"
30607
[(match_operand:SI 1 "s_register_operand" "r")
30608
(match_operand:SI 2 "arm_add_operand" "rIL")])
30609
@@ -9732,7 +10751,7 @@
30610
[(match_operand:SI 4 "s_register_operand" "r")
30611
(match_operand:SI 5 "arm_add_operand" "rIL")]))
30613
- (set (match_operand:SI 7 "s_register_operand" "=r")
30614
+ (set (match_operand:SI 7 "s_register_operand" "=Ts")
30615
(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
30616
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
30618
@@ -9754,7 +10773,7 @@
30619
;; need only zero the value if false (if true, then the value is already
30621
(define_insn_and_split "*and_scc_scc_nodom"
30622
- [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
30623
+ [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts")
30624
(and:SI (match_operator:SI 3 "arm_comparison_operator"
30625
[(match_operand:SI 1 "s_register_operand" "r,r,0")
30626
(match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")])
30627
@@ -9822,24 +10841,75 @@
30629
;; ??? The conditional patterns above need checking for Thumb-2 usefulness
30631
-(define_insn "*negscc"
30632
+(define_insn_and_split "*negscc"
30633
[(set (match_operand:SI 0 "s_register_operand" "=r")
30634
(neg:SI (match_operator 3 "arm_comparison_operator"
30635
[(match_operand:SI 1 "s_register_operand" "r")
30636
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
30637
(clobber (reg:CC CC_REGNUM))]
30640
- if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
30641
- return \"mov\\t%0, %1, asr #31\";
30643
+ "&& reload_completed"
30646
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
30648
- if (GET_CODE (operands[3]) == NE)
30649
- return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\";
30650
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
30652
+ /* Emit mov\\t%0, %1, asr #31 */
30653
+ emit_insn (gen_rtx_SET (VOIDmode,
30655
+ gen_rtx_ASHIFTRT (SImode,
30660
+ else if (GET_CODE (operands[3]) == NE)
30662
+ /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */
30663
+ if (CONST_INT_P (operands[2]))
30664
+ emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
30665
+ GEN_INT (- INTVAL (operands[2]))));
30667
+ emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
30669
- output_asm_insn (\"cmp\\t%1, %2\", operands);
30670
- output_asm_insn (\"mov%D3\\t%0, #0\", operands);
30671
- return \"mvn%d3\\t%0, #0\";
30673
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
30674
+ gen_rtx_NE (SImode,
30677
+ gen_rtx_SET (SImode,
30684
+ /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
30685
+ emit_insn (gen_rtx_SET (VOIDmode,
30687
+ gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
30688
+ enum rtx_code rc = GET_CODE (operands[3]);
30690
+ rc = reverse_condition (rc);
30691
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
30692
+ gen_rtx_fmt_ee (rc,
30696
+ gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
30697
+ rc = GET_CODE (operands[3]);
30698
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
30699
+ gen_rtx_fmt_ee (rc,
30703
+ gen_rtx_SET (VOIDmode,
30710
[(set_attr "conds" "clob")
30711
(set_attr "length" "12")]
30713
@@ -11280,6 +12350,7 @@
30717
+[(set_attr "predicated" "yes")]
30720
(define_insn "force_register_use"
30721
@@ -11550,7 +12621,8 @@
30723
"ldrd%?\t%0, %3, [%1, %2]"
30724
[(set_attr "type" "load2")
30725
- (set_attr "predicable" "yes")])
30726
+ (set_attr "predicable" "yes")
30727
+ (set_attr "predicable_short_it" "no")])
30729
(define_insn "*thumb2_ldrd_base"
30730
[(set (match_operand:SI 0 "s_register_operand" "=r")
30731
@@ -11564,7 +12636,8 @@
30732
operands[1], 0, false, true))"
30733
"ldrd%?\t%0, %2, [%1]"
30734
[(set_attr "type" "load2")
30735
- (set_attr "predicable" "yes")])
30736
+ (set_attr "predicable" "yes")
30737
+ (set_attr "predicable_short_it" "no")])
30739
(define_insn "*thumb2_ldrd_base_neg"
30740
[(set (match_operand:SI 0 "s_register_operand" "=r")
30741
@@ -11578,7 +12651,8 @@
30742
operands[1], -4, false, true))"
30743
"ldrd%?\t%0, %2, [%1, #-4]"
30744
[(set_attr "type" "load2")
30745
- (set_attr "predicable" "yes")])
30746
+ (set_attr "predicable" "yes")
30747
+ (set_attr "predicable_short_it" "no")])
30749
(define_insn "*thumb2_strd"
30750
[(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
30751
@@ -11595,7 +12669,8 @@
30753
"strd%?\t%2, %4, [%0, %1]"
30754
[(set_attr "type" "store2")
30755
- (set_attr "predicable" "yes")])
30756
+ (set_attr "predicable" "yes")
30757
+ (set_attr "predicable_short_it" "no")])
30759
(define_insn "*thumb2_strd_base"
30760
[(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
30761
@@ -11609,7 +12684,8 @@
30762
operands[0], 0, false, false))"
30763
"strd%?\t%1, %2, [%0]"
30764
[(set_attr "type" "store2")
30765
- (set_attr "predicable" "yes")])
30766
+ (set_attr "predicable" "yes")
30767
+ (set_attr "predicable_short_it" "no")])
30769
(define_insn "*thumb2_strd_base_neg"
30770
[(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
30771
@@ -11623,9 +12699,13 @@
30772
operands[0], -4, false, false))"
30773
"strd%?\t%1, %2, [%0, #-4]"
30774
[(set_attr "type" "store2")
30775
- (set_attr "predicable" "yes")])
30776
+ (set_attr "predicable" "yes")
30777
+ (set_attr "predicable_short_it" "no")])
30780
+;; Load the load/store double peephole optimizations.
30781
+(include "ldrdstrd.md")
30783
;; Load the load/store multiple patterns
30784
(include "ldmstm.md")
30786
--- a/src/gcc/config/arm/fmp626.md
30787
+++ b/src/gcc/config/arm/fmp626.md
30788
@@ -77,22 +77,22 @@
30790
(define_insn_reservation "mp626_mult1" 2
30791
(and (eq_attr "tune" "fmp626")
30792
- (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
30793
+ (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
30796
(define_insn_reservation "mp626_mult2" 2
30797
(and (eq_attr "tune" "fmp626")
30798
- (eq_attr "insn" "mul,mla"))
30799
+ (eq_attr "type" "mul,mla"))
30802
(define_insn_reservation "mp626_mult3" 3
30803
(and (eq_attr "tune" "fmp626")
30804
- (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
30805
+ (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
30808
(define_insn_reservation "mp626_mult4" 4
30809
(and (eq_attr "tune" "fmp626")
30810
- (eq_attr "insn" "smulls,smlals,umulls,umlals"))
30811
+ (eq_attr "type" "smulls,smlals,umulls,umlals"))
30814
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30815
--- a/src/gcc/config/arm/fa526.md
30816
+++ b/src/gcc/config/arm/fa526.md
30817
@@ -76,12 +76,12 @@
30819
(define_insn_reservation "526_mult1" 2
30820
(and (eq_attr "tune" "fa526")
30821
- (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
30822
+ (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy"))
30825
(define_insn_reservation "526_mult2" 5
30826
(and (eq_attr "tune" "fa526")
30827
- (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
30828
+ (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
30829
umlals,smulls,smlals,smlawx"))
30832
--- a/src/gcc/config/arm/arm-generic.md
30833
+++ b/src/gcc/config/arm/arm-generic.md
30834
@@ -114,7 +114,9 @@
30836
(define_insn_reservation "mult" 16
30837
(and (eq_attr "generic_sched" "yes")
30838
- (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")))
30839
+ (and (eq_attr "ldsched" "no")
30840
+ (ior (eq_attr "mul32" "yes")
30841
+ (eq_attr "mul64" "yes"))))
30844
(define_insn_reservation "mult_ldsched_strongarm" 3
30845
@@ -122,7 +124,8 @@
30846
(and (eq_attr "ldsched" "yes")
30847
(and (eq_attr "tune"
30848
"strongarm,strongarm110,strongarm1100,strongarm1110")
30849
- (eq_attr "type" "mult"))))
30850
+ (ior (eq_attr "mul32" "yes")
30851
+ (eq_attr "mul64" "yes")))))
30854
(define_insn_reservation "mult_ldsched" 4
30855
@@ -130,13 +133,17 @@
30856
(and (eq_attr "ldsched" "yes")
30857
(and (eq_attr "tune"
30858
"!strongarm,strongarm110,strongarm1100,strongarm1110")
30859
- (eq_attr "type" "mult"))))
30860
+ (ior (eq_attr "mul32" "yes")
30861
+ (eq_attr "mul64" "yes")))))
30864
(define_insn_reservation "multi_cycle" 32
30865
(and (eq_attr "generic_sched" "yes")
30866
(and (eq_attr "core_cycles" "multi")
30867
- (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4")))
30868
+ (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\
30869
+ store1,store2,store3,store4")
30870
+ (not (ior (eq_attr "mul32" "yes")
30871
+ (eq_attr "mul64" "yes"))))))
30874
(define_insn_reservation "single_cycle" 1
30875
--- a/src/gcc/config/arm/iwmmxt2.md
30876
+++ b/src/gcc/config/arm/iwmmxt2.md
30878
"TARGET_REALLY_IWMMXT"
30879
"wabs<MMX_char>%?\\t%0, %1"
30880
[(set_attr "predicable" "yes")
30881
- (set_attr "wtype" "wabs")]
30882
+ (set_attr "type" "wmmx_wabs")]
30885
(define_insn "iwmmxt_wabsdiffb"
30887
"TARGET_REALLY_IWMMXT"
30888
"wabsdiffb%?\\t%0, %1, %2"
30889
[(set_attr "predicable" "yes")
30890
- (set_attr "wtype" "wabsdiff")]
30891
+ (set_attr "type" "wmmx_wabsdiff")]
30894
(define_insn "iwmmxt_wabsdiffh"
30896
"TARGET_REALLY_IWMMXT"
30897
"wabsdiffh%?\\t%0, %1, %2"
30898
[(set_attr "predicable" "yes")
30899
- (set_attr "wtype" "wabsdiff")]
30900
+ (set_attr "type" "wmmx_wabsdiff")]
30903
(define_insn "iwmmxt_wabsdiffw"
30905
"TARGET_REALLY_IWMMXT"
30906
"wabsdiffw%?\\t%0, %1, %2"
30907
[(set_attr "predicable" "yes")
30908
- (set_attr "wtype" "wabsdiff")]
30909
+ (set_attr "type" "wmmx_wabsdiff")]
30912
(define_insn "iwmmxt_waddsubhx"
30914
"TARGET_REALLY_IWMMXT"
30915
"waddsubhx%?\\t%0, %1, %2"
30916
[(set_attr "predicable" "yes")
30917
- (set_attr "wtype" "waddsubhx")]
30918
+ (set_attr "type" "wmmx_waddsubhx")]
30921
(define_insn "iwmmxt_wsubaddhx"
30923
"TARGET_REALLY_IWMMXT"
30924
"wsubaddhx%?\\t%0, %1, %2"
30925
[(set_attr "predicable" "yes")
30926
- (set_attr "wtype" "wsubaddhx")]
30927
+ (set_attr "type" "wmmx_wsubaddhx")]
30930
(define_insn "addc<mode>3"
30931
@@ -111,7 +111,7 @@
30932
"TARGET_REALLY_IWMMXT"
30933
"wadd<MMX_char>c%?\\t%0, %1, %2"
30934
[(set_attr "predicable" "yes")
30935
- (set_attr "wtype" "wadd")]
30936
+ (set_attr "type" "wmmx_wadd")]
30939
(define_insn "iwmmxt_avg4"
30940
@@ -143,7 +143,7 @@
30941
"TARGET_REALLY_IWMMXT"
30942
"wavg4%?\\t%0, %1, %2"
30943
[(set_attr "predicable" "yes")
30944
- (set_attr "wtype" "wavg4")]
30945
+ (set_attr "type" "wmmx_wavg4")]
30948
(define_insn "iwmmxt_avg4r"
30949
@@ -175,7 +175,7 @@
30950
"TARGET_REALLY_IWMMXT"
30951
"wavg4r%?\\t%0, %1, %2"
30952
[(set_attr "predicable" "yes")
30953
- (set_attr "wtype" "wavg4")]
30954
+ (set_attr "type" "wmmx_wavg4")]
30957
(define_insn "iwmmxt_wmaddsx"
30958
@@ -194,7 +194,7 @@
30959
"TARGET_REALLY_IWMMXT"
30960
"wmaddsx%?\\t%0, %1, %2"
30961
[(set_attr "predicable" "yes")
30962
- (set_attr "wtype" "wmadd")]
30963
+ (set_attr "type" "wmmx_wmadd")]
30966
(define_insn "iwmmxt_wmaddux"
30967
@@ -213,7 +213,7 @@
30968
"TARGET_REALLY_IWMMXT"
30969
"wmaddux%?\\t%0, %1, %2"
30970
[(set_attr "predicable" "yes")
30971
- (set_attr "wtype" "wmadd")]
30972
+ (set_attr "type" "wmmx_wmadd")]
30975
(define_insn "iwmmxt_wmaddsn"
30976
@@ -232,7 +232,7 @@
30977
"TARGET_REALLY_IWMMXT"
30978
"wmaddsn%?\\t%0, %1, %2"
30979
[(set_attr "predicable" "yes")
30980
- (set_attr "wtype" "wmadd")]
30981
+ (set_attr "type" "wmmx_wmadd")]
30984
(define_insn "iwmmxt_wmaddun"
30985
@@ -251,7 +251,7 @@
30986
"TARGET_REALLY_IWMMXT"
30987
"wmaddun%?\\t%0, %1, %2"
30988
[(set_attr "predicable" "yes")
30989
- (set_attr "wtype" "wmadd")]
30990
+ (set_attr "type" "wmmx_wmadd")]
30993
(define_insn "iwmmxt_wmulwsm"
30994
@@ -265,7 +265,7 @@
30995
"TARGET_REALLY_IWMMXT"
30996
"wmulwsm%?\\t%0, %1, %2"
30997
[(set_attr "predicable" "yes")
30998
- (set_attr "wtype" "wmulw")]
30999
+ (set_attr "type" "wmmx_wmulw")]
31002
(define_insn "iwmmxt_wmulwum"
31003
@@ -279,7 +279,7 @@
31004
"TARGET_REALLY_IWMMXT"
31005
"wmulwum%?\\t%0, %1, %2"
31006
[(set_attr "predicable" "yes")
31007
- (set_attr "wtype" "wmulw")]
31008
+ (set_attr "type" "wmmx_wmulw")]
31011
(define_insn "iwmmxt_wmulsmr"
31012
@@ -297,7 +297,7 @@
31013
"TARGET_REALLY_IWMMXT"
31014
"wmulsmr%?\\t%0, %1, %2"
31015
[(set_attr "predicable" "yes")
31016
- (set_attr "wtype" "wmul")]
31017
+ (set_attr "type" "wmmx_wmul")]
31020
(define_insn "iwmmxt_wmulumr"
31021
@@ -316,7 +316,7 @@
31022
"TARGET_REALLY_IWMMXT"
31023
"wmulumr%?\\t%0, %1, %2"
31024
[(set_attr "predicable" "yes")
31025
- (set_attr "wtype" "wmul")]
31026
+ (set_attr "type" "wmmx_wmul")]
31029
(define_insn "iwmmxt_wmulwsmr"
31030
@@ -333,7 +333,7 @@
31031
"TARGET_REALLY_IWMMXT"
31032
"wmulwsmr%?\\t%0, %1, %2"
31033
[(set_attr "predicable" "yes")
31034
- (set_attr "wtype" "wmul")]
31035
+ (set_attr "type" "wmmx_wmul")]
31038
(define_insn "iwmmxt_wmulwumr"
31039
@@ -350,7 +350,7 @@
31040
"TARGET_REALLY_IWMMXT"
31041
"wmulwumr%?\\t%0, %1, %2"
31042
[(set_attr "predicable" "yes")
31043
- (set_attr "wtype" "wmulw")]
31044
+ (set_attr "type" "wmmx_wmulw")]
31047
(define_insn "iwmmxt_wmulwl"
31048
@@ -361,7 +361,7 @@
31049
"TARGET_REALLY_IWMMXT"
31050
"wmulwl%?\\t%0, %1, %2"
31051
[(set_attr "predicable" "yes")
31052
- (set_attr "wtype" "wmulw")]
31053
+ (set_attr "type" "wmmx_wmulw")]
31056
(define_insn "iwmmxt_wqmulm"
31057
@@ -371,7 +371,7 @@
31058
"TARGET_REALLY_IWMMXT"
31059
"wqmulm%?\\t%0, %1, %2"
31060
[(set_attr "predicable" "yes")
31061
- (set_attr "wtype" "wqmulm")]
31062
+ (set_attr "type" "wmmx_wqmulm")]
31065
(define_insn "iwmmxt_wqmulwm"
31066
@@ -381,7 +381,7 @@
31067
"TARGET_REALLY_IWMMXT"
31068
"wqmulwm%?\\t%0, %1, %2"
31069
[(set_attr "predicable" "yes")
31070
- (set_attr "wtype" "wqmulwm")]
31071
+ (set_attr "type" "wmmx_wqmulwm")]
31074
(define_insn "iwmmxt_wqmulmr"
31075
@@ -391,7 +391,7 @@
31076
"TARGET_REALLY_IWMMXT"
31077
"wqmulmr%?\\t%0, %1, %2"
31078
[(set_attr "predicable" "yes")
31079
- (set_attr "wtype" "wqmulm")]
31080
+ (set_attr "type" "wmmx_wqmulm")]
31083
(define_insn "iwmmxt_wqmulwmr"
31084
@@ -401,7 +401,7 @@
31085
"TARGET_REALLY_IWMMXT"
31086
"wqmulwmr%?\\t%0, %1, %2"
31087
[(set_attr "predicable" "yes")
31088
- (set_attr "wtype" "wqmulwm")]
31089
+ (set_attr "type" "wmmx_wqmulwm")]
31092
(define_insn "iwmmxt_waddbhusm"
31093
@@ -417,7 +417,7 @@
31094
"TARGET_REALLY_IWMMXT"
31095
"waddbhusm%?\\t%0, %1, %2"
31096
[(set_attr "predicable" "yes")
31097
- (set_attr "wtype" "waddbhus")]
31098
+ (set_attr "type" "wmmx_waddbhus")]
31101
(define_insn "iwmmxt_waddbhusl"
31102
@@ -433,7 +433,7 @@
31103
"TARGET_REALLY_IWMMXT"
31104
"waddbhusl%?\\t%0, %1, %2"
31105
[(set_attr "predicable" "yes")
31106
- (set_attr "wtype" "waddbhus")]
31107
+ (set_attr "type" "wmmx_waddbhus")]
31110
(define_insn "iwmmxt_wqmiabb"
31111
@@ -446,7 +446,7 @@
31112
"TARGET_REALLY_IWMMXT"
31113
"wqmiabb%?\\t%0, %2, %3"
31114
[(set_attr "predicable" "yes")
31115
- (set_attr "wtype" "wqmiaxy")]
31116
+ (set_attr "type" "wmmx_wqmiaxy")]
31119
(define_insn "iwmmxt_wqmiabt"
31120
@@ -459,7 +459,7 @@
31121
"TARGET_REALLY_IWMMXT"
31122
"wqmiabt%?\\t%0, %2, %3"
31123
[(set_attr "predicable" "yes")
31124
- (set_attr "wtype" "wqmiaxy")]
31125
+ (set_attr "type" "wmmx_wqmiaxy")]
31128
(define_insn "iwmmxt_wqmiatb"
31129
@@ -472,7 +472,7 @@
31130
"TARGET_REALLY_IWMMXT"
31131
"wqmiatb%?\\t%0, %2, %3"
31132
[(set_attr "predicable" "yes")
31133
- (set_attr "wtype" "wqmiaxy")]
31134
+ (set_attr "type" "wmmx_wqmiaxy")]
31137
(define_insn "iwmmxt_wqmiatt"
31138
@@ -485,7 +485,7 @@
31139
"TARGET_REALLY_IWMMXT"
31140
"wqmiatt%?\\t%0, %2, %3"
31141
[(set_attr "predicable" "yes")
31142
- (set_attr "wtype" "wqmiaxy")]
31143
+ (set_attr "type" "wmmx_wqmiaxy")]
31146
(define_insn "iwmmxt_wqmiabbn"
31147
@@ -498,7 +498,7 @@
31148
"TARGET_REALLY_IWMMXT"
31149
"wqmiabbn%?\\t%0, %2, %3"
31150
[(set_attr "predicable" "yes")
31151
- (set_attr "wtype" "wqmiaxy")]
31152
+ (set_attr "type" "wmmx_wqmiaxy")]
31155
(define_insn "iwmmxt_wqmiabtn"
31156
@@ -511,7 +511,7 @@
31157
"TARGET_REALLY_IWMMXT"
31158
"wqmiabtn%?\\t%0, %2, %3"
31159
[(set_attr "predicable" "yes")
31160
- (set_attr "wtype" "wqmiaxy")]
31161
+ (set_attr "type" "wmmx_wqmiaxy")]
31164
(define_insn "iwmmxt_wqmiatbn"
31165
@@ -524,7 +524,7 @@
31166
"TARGET_REALLY_IWMMXT"
31167
"wqmiatbn%?\\t%0, %2, %3"
31168
[(set_attr "predicable" "yes")
31169
- (set_attr "wtype" "wqmiaxy")]
31170
+ (set_attr "type" "wmmx_wqmiaxy")]
31173
(define_insn "iwmmxt_wqmiattn"
31174
@@ -537,7 +537,7 @@
31175
"TARGET_REALLY_IWMMXT"
31176
"wqmiattn%?\\t%0, %2, %3"
31177
[(set_attr "predicable" "yes")
31178
- (set_attr "wtype" "wqmiaxy")]
31179
+ (set_attr "type" "wmmx_wqmiaxy")]
31182
(define_insn "iwmmxt_wmiabb"
31183
@@ -561,7 +561,7 @@
31184
"TARGET_REALLY_IWMMXT"
31185
"wmiabb%?\\t%0, %2, %3"
31186
[(set_attr "predicable" "yes")
31187
- (set_attr "wtype" "wmiaxy")]
31188
+ (set_attr "type" "wmmx_wmiaxy")]
31191
(define_insn "iwmmxt_wmiabt"
31192
@@ -585,7 +585,7 @@
31193
"TARGET_REALLY_IWMMXT"
31194
"wmiabt%?\\t%0, %2, %3"
31195
[(set_attr "predicable" "yes")
31196
- (set_attr "wtype" "wmiaxy")]
31197
+ (set_attr "type" "wmmx_wmiaxy")]
31200
(define_insn "iwmmxt_wmiatb"
31201
@@ -609,7 +609,7 @@
31202
"TARGET_REALLY_IWMMXT"
31203
"wmiatb%?\\t%0, %2, %3"
31204
[(set_attr "predicable" "yes")
31205
- (set_attr "wtype" "wmiaxy")]
31206
+ (set_attr "type" "wmmx_wmiaxy")]
31209
(define_insn "iwmmxt_wmiatt"
31210
@@ -633,7 +633,7 @@
31211
"TARGET_REALLY_IWMMXT"
31212
"wmiatt%?\\t%0, %2, %3"
31213
[(set_attr "predicable" "yes")
31214
- (set_attr "wtype" "wmiaxy")]
31215
+ (set_attr "type" "wmmx_wmiaxy")]
31218
(define_insn "iwmmxt_wmiabbn"
31219
@@ -657,7 +657,7 @@
31220
"TARGET_REALLY_IWMMXT"
31221
"wmiabbn%?\\t%0, %2, %3"
31222
[(set_attr "predicable" "yes")
31223
- (set_attr "wtype" "wmiaxy")]
31224
+ (set_attr "type" "wmmx_wmiaxy")]
31227
(define_insn "iwmmxt_wmiabtn"
31228
@@ -681,7 +681,7 @@
31229
"TARGET_REALLY_IWMMXT"
31230
"wmiabtn%?\\t%0, %2, %3"
31231
[(set_attr "predicable" "yes")
31232
- (set_attr "wtype" "wmiaxy")]
31233
+ (set_attr "type" "wmmx_wmiaxy")]
31236
(define_insn "iwmmxt_wmiatbn"
31237
@@ -705,7 +705,7 @@
31238
"TARGET_REALLY_IWMMXT"
31239
"wmiatbn%?\\t%0, %2, %3"
31240
[(set_attr "predicable" "yes")
31241
- (set_attr "wtype" "wmiaxy")]
31242
+ (set_attr "type" "wmmx_wmiaxy")]
31245
(define_insn "iwmmxt_wmiattn"
31246
@@ -729,7 +729,7 @@
31247
"TARGET_REALLY_IWMMXT"
31248
"wmiattn%?\\t%0, %2, %3"
31249
[(set_attr "predicable" "yes")
31250
- (set_attr "wtype" "wmiaxy")]
31251
+ (set_attr "type" "wmmx_wmiaxy")]
31254
(define_insn "iwmmxt_wmiawbb"
31255
@@ -742,7 +742,7 @@
31256
"TARGET_REALLY_IWMMXT"
31257
"wmiawbb%?\\t%0, %2, %3"
31258
[(set_attr "predicable" "yes")
31259
- (set_attr "wtype" "wmiawxy")]
31260
+ (set_attr "type" "wmmx_wmiawxy")]
31263
(define_insn "iwmmxt_wmiawbt"
31264
@@ -755,7 +755,7 @@
31265
"TARGET_REALLY_IWMMXT"
31266
"wmiawbt%?\\t%0, %2, %3"
31267
[(set_attr "predicable" "yes")
31268
- (set_attr "wtype" "wmiawxy")]
31269
+ (set_attr "type" "wmmx_wmiawxy")]
31272
(define_insn "iwmmxt_wmiawtb"
31273
@@ -768,7 +768,7 @@
31274
"TARGET_REALLY_IWMMXT"
31275
"wmiawtb%?\\t%0, %2, %3"
31276
[(set_attr "predicable" "yes")
31277
- (set_attr "wtype" "wmiawxy")]
31278
+ (set_attr "type" "wmmx_wmiawxy")]
31281
(define_insn "iwmmxt_wmiawtt"
31282
@@ -781,7 +781,7 @@
31283
"TARGET_REALLY_IWMMXT"
31284
"wmiawtt%?\\t%0, %2, %3"
31285
[(set_attr "predicable" "yes")
31286
- (set_attr "wtype" "wmiawxy")]
31287
+ (set_attr "type" "wmmx_wmiawxy")]
31290
(define_insn "iwmmxt_wmiawbbn"
31291
@@ -794,7 +794,7 @@
31292
"TARGET_REALLY_IWMMXT"
31293
"wmiawbbn%?\\t%0, %2, %3"
31294
[(set_attr "predicable" "yes")
31295
- (set_attr "wtype" "wmiawxy")]
31296
+ (set_attr "type" "wmmx_wmiawxy")]
31299
(define_insn "iwmmxt_wmiawbtn"
31300
@@ -807,7 +807,7 @@
31301
"TARGET_REALLY_IWMMXT"
31302
"wmiawbtn%?\\t%0, %2, %3"
31303
[(set_attr "predicable" "yes")
31304
- (set_attr "wtype" "wmiawxy")]
31305
+ (set_attr "type" "wmmx_wmiawxy")]
31308
(define_insn "iwmmxt_wmiawtbn"
31309
@@ -820,7 +820,7 @@
31310
"TARGET_REALLY_IWMMXT"
31311
"wmiawtbn%?\\t%0, %2, %3"
31312
[(set_attr "predicable" "yes")
31313
- (set_attr "wtype" "wmiawxy")]
31314
+ (set_attr "type" "wmmx_wmiawxy")]
31317
(define_insn "iwmmxt_wmiawttn"
31318
@@ -833,7 +833,7 @@
31319
"TARGET_REALLY_IWMMXT"
31320
"wmiawttn%?\\t%0, %2, %3"
31321
[(set_attr "predicable" "yes")
31322
- (set_attr "wtype" "wmiawxy")]
31323
+ (set_attr "type" "wmmx_wmiawxy")]
31326
(define_insn "iwmmxt_wmerge"
31327
@@ -858,7 +858,7 @@
31328
"TARGET_REALLY_IWMMXT"
31329
"wmerge%?\\t%0, %1, %2, %3"
31330
[(set_attr "predicable" "yes")
31331
- (set_attr "wtype" "wmerge")]
31332
+ (set_attr "type" "wmmx_wmerge")]
31335
(define_insn "iwmmxt_tandc<mode>3"
31336
@@ -868,7 +868,7 @@
31337
"TARGET_REALLY_IWMMXT"
31338
"tandc<MMX_char>%?\\t r15"
31339
[(set_attr "predicable" "yes")
31340
- (set_attr "wtype" "tandc")]
31341
+ (set_attr "type" "wmmx_tandc")]
31344
(define_insn "iwmmxt_torc<mode>3"
31345
@@ -878,7 +878,7 @@
31346
"TARGET_REALLY_IWMMXT"
31347
"torc<MMX_char>%?\\t r15"
31348
[(set_attr "predicable" "yes")
31349
- (set_attr "wtype" "torc")]
31350
+ (set_attr "type" "wmmx_torc")]
31353
(define_insn "iwmmxt_torvsc<mode>3"
31354
@@ -888,7 +888,7 @@
31355
"TARGET_REALLY_IWMMXT"
31356
"torvsc<MMX_char>%?\\t r15"
31357
[(set_attr "predicable" "yes")
31358
- (set_attr "wtype" "torvsc")]
31359
+ (set_attr "type" "wmmx_torvsc")]
31362
(define_insn "iwmmxt_textrc<mode>3"
31363
@@ -899,5 +899,5 @@
31364
"TARGET_REALLY_IWMMXT"
31365
"textrc<MMX_char>%?\\t r15, %0"
31366
[(set_attr "predicable" "yes")
31367
- (set_attr "wtype" "textrc")]
31368
+ (set_attr "type" "wmmx_textrc")]
31370
--- a/src/gcc/config/arm/cortex-a5.md
31371
+++ b/src/gcc/config/arm/cortex-a5.md
31374
(define_insn_reservation "cortex_a5_mul" 2
31375
(and (eq_attr "tune" "cortexa5")
31376
- (eq_attr "type" "mult"))
31377
+ (ior (eq_attr "mul32" "yes")
31378
+ (eq_attr "mul64" "yes")))
31381
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31382
--- a/src/gcc/config/arm/fa606te.md
31383
+++ b/src/gcc/config/arm/fa606te.md
31384
@@ -71,22 +71,22 @@
31386
(define_insn_reservation "606te_mult1" 2
31387
(and (eq_attr "tune" "fa606te")
31388
- (eq_attr "insn" "smlalxy"))
31389
+ (eq_attr "type" "smlalxy"))
31392
(define_insn_reservation "606te_mult2" 3
31393
(and (eq_attr "tune" "fa606te")
31394
- (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
31395
+ (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy"))
31398
(define_insn_reservation "606te_mult3" 4
31399
(and (eq_attr "tune" "fa606te")
31400
- (eq_attr "insn" "mul,mla,muls,mlas"))
31401
+ (eq_attr "type" "mul,mla,muls,mlas"))
31404
(define_insn_reservation "606te_mult4" 5
31405
(and (eq_attr "tune" "fa606te")
31406
- (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
31407
+ (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
31410
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31411
--- a/src/gcc/config/arm/cortex-a9.md
31412
+++ b/src/gcc/config/arm/cortex-a9.md
31413
@@ -130,29 +130,29 @@
31414
;; We get 16*16 multiply / mac results in 3 cycles.
31415
(define_insn_reservation "cortex_a9_mult16" 3
31416
(and (eq_attr "tune" "cortexa9")
31417
- (eq_attr "insn" "smulxy"))
31418
+ (eq_attr "type" "smulxy"))
31419
"cortex_a9_mult16")
31421
;; The 16*16 mac is slightly different that it
31422
;; reserves M1 and M2 in the same cycle.
31423
(define_insn_reservation "cortex_a9_mac16" 3
31424
(and (eq_attr "tune" "cortexa9")
31425
- (eq_attr "insn" "smlaxy"))
31426
+ (eq_attr "type" "smlaxy"))
31429
(define_insn_reservation "cortex_a9_multiply" 4
31430
(and (eq_attr "tune" "cortexa9")
31431
- (eq_attr "insn" "mul,smmul,smmulr"))
31432
+ (eq_attr "type" "mul,smmul,smmulr"))
31435
(define_insn_reservation "cortex_a9_mac" 4
31436
(and (eq_attr "tune" "cortexa9")
31437
- (eq_attr "insn" "mla,smmla"))
31438
+ (eq_attr "type" "mla,smmla"))
31441
(define_insn_reservation "cortex_a9_multiply_long" 5
31442
(and (eq_attr "tune" "cortexa9")
31443
- (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
31444
+ (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
31445
"cortex_a9_mult_long")
31447
;; An instruction with a result in E2 can be forwarded
31448
--- a/src/gcc/config/arm/fa626te.md
31449
+++ b/src/gcc/config/arm/fa626te.md
31450
@@ -82,22 +82,22 @@
31452
(define_insn_reservation "626te_mult1" 2
31453
(and (eq_attr "tune" "fa626,fa626te")
31454
- (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
31455
+ (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
31458
(define_insn_reservation "626te_mult2" 2
31459
(and (eq_attr "tune" "fa626,fa626te")
31460
- (eq_attr "insn" "mul,mla"))
31461
+ (eq_attr "type" "mul,mla"))
31464
(define_insn_reservation "626te_mult3" 3
31465
(and (eq_attr "tune" "fa626,fa626te")
31466
- (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
31467
+ (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
31470
(define_insn_reservation "626te_mult4" 4
31471
(and (eq_attr "tune" "fa626,fa626te")
31472
- (eq_attr "insn" "smulls,smlals,umulls,umlals"))
31473
+ (eq_attr "type" "smulls,smlals,umulls,umlals"))
31476
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31477
--- a/src/gcc/config/arm/neon-gen.ml
31478
+++ b/src/gcc/config/arm/neon-gen.ml
31479
@@ -121,6 +121,7 @@
31480
| T_uint16 | T_int16 -> T_intHI
31481
| T_uint32 | T_int32 -> T_intSI
31482
| T_uint64 | T_int64 -> T_intDI
31483
+ | T_float16 -> T_floatHF
31484
| T_float32 -> T_floatSF
31485
| T_poly8 -> T_intQI
31486
| T_poly16 -> T_intHI
31487
@@ -275,8 +276,8 @@
31488
let mode = mode_of_elt elttype shape in
31489
string_of_mode mode
31490
with MixedMode (dst, src) ->
31491
- let dstmode = mode_of_elt dst shape
31492
- and srcmode = mode_of_elt src shape in
31493
+ let dstmode = mode_of_elt ~argpos:0 dst shape
31494
+ and srcmode = mode_of_elt ~argpos:1 src shape in
31495
string_of_mode dstmode ^ string_of_mode srcmode
31497
let get_shuffle features =
31498
@@ -291,19 +292,24 @@
31499
match List.find (fun feature ->
31500
match feature with Requires_feature _ -> true
31501
| Requires_arch _ -> true
31502
+ | Requires_FP_bit _ -> true
31505
- Requires_feature feature ->
31506
+ Requires_feature feature ->
31507
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
31508
| Requires_arch arch ->
31509
Format.printf "#if __ARM_ARCH >= %d@\n" arch
31510
+ | Requires_FP_bit bit ->
31511
+ Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
31513
| _ -> assert false
31514
with Not_found -> assert true
31516
let print_feature_test_end features =
31518
- List.exists (function Requires_feature x -> true
31519
- | Requires_arch x -> true
31520
+ List.exists (function Requires_feature _ -> true
31521
+ | Requires_arch _ -> true
31522
+ | Requires_FP_bit _ -> true
31523
| _ -> false) features in
31524
if feature then Format.printf "#endif@\n"
31526
@@ -365,6 +371,7 @@
31527
"__builtin_neon_hi", "int", 16, 4;
31528
"__builtin_neon_si", "int", 32, 2;
31529
"__builtin_neon_di", "int", 64, 1;
31530
+ "__builtin_neon_hf", "float", 16, 4;
31531
"__builtin_neon_sf", "float", 32, 2;
31532
"__builtin_neon_poly8", "poly", 8, 8;
31533
"__builtin_neon_poly16", "poly", 16, 4;
31534
--- a/src/libobjc/ChangeLog.linaro
31535
+++ b/src/libobjc/ChangeLog.linaro
31537
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31539
+ GCC Linaro 4.8-2013.07-1 released.
31541
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31543
+ GCC Linaro 4.8-2013.07 released.
31545
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31547
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31549
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31551
+ GCC Linaro 4.8-2013.05 released.
31553
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31555
+ * GCC Linaro 4.8-2013.04 released.
31556
--- a/src/libgfortran/ChangeLog.linaro
31557
+++ b/src/libgfortran/ChangeLog.linaro
31559
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31561
+ GCC Linaro 4.8-2013.07-1 released.
31563
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31565
+ GCC Linaro 4.8-2013.07 released.
31567
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31569
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31571
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31573
+ GCC Linaro 4.8-2013.05 released.
31575
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31577
+ * GCC Linaro 4.8-2013.04 released.
31578
--- a/src/libada/ChangeLog.linaro
31579
+++ b/src/libada/ChangeLog.linaro
31581
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31583
+ GCC Linaro 4.8-2013.07-1 released.
31585
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31587
+ GCC Linaro 4.8-2013.07 released.
31589
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31591
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31593
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31595
+ GCC Linaro 4.8-2013.05 released.
31597
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31599
+ * GCC Linaro 4.8-2013.04 released.
31600
--- a/src/libffi/ChangeLog.linaro
31601
+++ b/src/libffi/ChangeLog.linaro
31603
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31605
+ GCC Linaro 4.8-2013.07-1 released.
31607
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31609
+ GCC Linaro 4.8-2013.07 released.
31611
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31613
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31615
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31617
+ GCC Linaro 4.8-2013.05 released.
31619
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31621
+ * GCC Linaro 4.8-2013.04 released.
31622
--- a/src/libssp/ChangeLog.linaro
31623
+++ b/src/libssp/ChangeLog.linaro
31625
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31627
+ GCC Linaro 4.8-2013.07-1 released.
31629
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31631
+ GCC Linaro 4.8-2013.07 released.
31633
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31635
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31637
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31639
+ GCC Linaro 4.8-2013.05 released.
31641
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31643
+ * GCC Linaro 4.8-2013.04 released.
31644
--- a/src/libcpp/ChangeLog.linaro
31645
+++ b/src/libcpp/ChangeLog.linaro
31647
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31649
+ GCC Linaro 4.8-2013.07-1 released.
31651
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31653
+ GCC Linaro 4.8-2013.07 released.
31655
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31657
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31659
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31661
+ GCC Linaro 4.8-2013.05 released.
31663
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31665
+ * GCC Linaro 4.8-2013.04 released.
31666
--- a/src/libcpp/po/ChangeLog.linaro
31667
+++ b/src/libcpp/po/ChangeLog.linaro
31669
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31671
+ GCC Linaro 4.8-2013.07-1 released.
31673
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31675
+ GCC Linaro 4.8-2013.07 released.
31677
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31679
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31681
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31683
+ GCC Linaro 4.8-2013.05 released.
31685
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31687
+ * GCC Linaro 4.8-2013.04 released.
31688
--- a/src/fixincludes/ChangeLog.linaro
31689
+++ b/src/fixincludes/ChangeLog.linaro
31691
+2013-07-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31693
+ GCC Linaro 4.8-2013.07-1 released.
31695
+2013-07-05 Christophe Lyon <christophe.lyon@linaro.org>
31697
+ GCC Linaro 4.8-2013.07 released.
31699
+2013-06-11 Rob Savoye <rob.savoye@linaro.org>
31701
+ GCC Linaro gcc-linaro-4.8-2013.06 released.
31703
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31705
+ GCC Linaro 4.8-2013.05 released.
31707
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
31709
+ * GCC Linaro 4.8-2013.04 released.