1
# DP: Changes for the Linaro 4.8-2013.06 release.
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@199609 \
4
svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@r199923 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/ChangeLog.linaro
8
+++ b/src/libitm/ChangeLog.linaro
10
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
12
+ GCC Linaro 4.8-2013.05 released.
14
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
16
+ * GCC Linaro 4.8-2013.04 released.
17
--- a/src/libgomp/ChangeLog.linaro
18
+++ b/src/libgomp/ChangeLog.linaro
20
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22
+ GCC Linaro 4.8-2013.05 released.
24
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
26
+ * GCC Linaro 4.8-2013.04 released.
27
--- a/src/libquadmath/ChangeLog.linaro
28
+++ b/src/libquadmath/ChangeLog.linaro
30
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
32
+ GCC Linaro 4.8-2013.05 released.
34
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
36
+ * GCC Linaro 4.8-2013.04 released.
37
--- a/src/libsanitizer/sanitizer_common/sanitizer_linux.cc
38
+++ b/src/libsanitizer/sanitizer_common/sanitizer_linux.cc
40
CHECK_EQ(*current_++, ' ');
41
while (IsDecimal(*current_))
43
- CHECK_EQ(*current_++, ' ');
44
+ // Qemu may lack the trailing space.
45
+ // http://code.google.com/p/address-sanitizer/issues/detail?id=160
46
+ // CHECK_EQ(*current_++, ' ');
48
while (current_ < next_line && *current_ == ' ')
50
--- a/src/libsanitizer/ChangeLog.linaro
51
+++ b/src/libsanitizer/ChangeLog.linaro
53
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
55
+ Backport from trunk r199606.
56
+ 2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
58
+ * sanitizer_common/sanitizer_linux.cc (MemoryMappingLayout::Next):
59
+ Cherry pick upstream r182922.
61
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
63
+ GCC Linaro 4.8-2013.05 released.
65
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
67
+ * GCC Linaro 4.8-2013.04 released.
68
--- a/src/zlib/ChangeLog.linaro
69
+++ b/src/zlib/ChangeLog.linaro
71
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
73
+ GCC Linaro 4.8-2013.05 released.
75
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
77
+ * GCC Linaro 4.8-2013.04 released.
78
--- a/src/libstdc++-v3/ChangeLog.linaro
79
+++ b/src/libstdc++-v3/ChangeLog.linaro
81
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
83
+ GCC Linaro 4.8-2013.05 released.
85
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
87
+ * GCC Linaro 4.8-2013.04 released.
88
--- a/src/intl/ChangeLog.linaro
89
+++ b/src/intl/ChangeLog.linaro
91
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
93
+ GCC Linaro 4.8-2013.05 released.
95
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
97
+ * GCC Linaro 4.8-2013.04 released.
98
--- a/src/ChangeLog.linaro
99
+++ b/src/ChangeLog.linaro
101
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
103
+ GCC Linaro 4.8-2013.05 released.
105
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
107
+ * GCC Linaro 4.8-2013.04 released.
108
--- a/src/libmudflap/ChangeLog.linaro
109
+++ b/src/libmudflap/ChangeLog.linaro
111
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
113
+ GCC Linaro 4.8-2013.05 released.
115
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
117
+ * GCC Linaro 4.8-2013.04 released.
118
--- a/src/boehm-gc/ChangeLog.linaro
119
+++ b/src/boehm-gc/ChangeLog.linaro
121
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
123
+ GCC Linaro 4.8-2013.05 released.
125
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
127
+ Backport from trunk r197770.
129
+ 2013-03-16 Yvan Roux <yvan.roux@linaro.org>
131
+ * include/private/gcconfig.h (AARCH64): New macro (defined only if
133
+ (mach_type_known): Update comment adding ARM AArch64 target.
134
+ (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE,
135
+ OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING,
136
+ DATASTART, DATAEND, STACKBOTTOM): Define for AArch64.
138
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
140
+ * GCC Linaro 4.8-2013.04 released.
141
--- a/src/boehm-gc/include/private/gcconfig.h
142
+++ b/src/boehm-gc/include/private/gcconfig.h
146
/* Determine the machine type: */
147
+#if defined(__aarch64__)
149
+# if !defined(LINUX)
151
+# define mach_type_known
154
# if defined(__arm__) || defined(__thumb__)
156
# if !defined(LINUX) && !defined(NETBSD)
159
# define mach_type_known
161
+# if defined(LINUX) && defined(__aarch64__)
163
+# define mach_type_known
165
# if defined(LINUX) && defined(__arm__)
167
# define mach_type_known
169
/* running Amdahl UTS4 */
170
/* S390 ==> 390-like machine */
172
+ /* AARCH64 ==> ARM AArch64 */
173
/* ARM32 ==> Intel StrongARM */
174
/* IA64 ==> Intel IPF */
176
@@ -1833,6 +1845,32 @@
181
+# define CPP_WORDSZ 64
182
+# define MACH_TYPE "AARCH64"
183
+# define ALIGNMENT 8
185
+# define HBLKSIZE 4096
188
+# define OS_TYPE "LINUX"
189
+# define LINUX_STACKBOTTOM
190
+# define USE_GENERIC_PUSH_REGS
191
+# define DYNAMIC_LOADING
192
+ extern int __data_start[];
193
+# define DATASTART ((ptr_t)__data_start)
194
+ extern char _end[];
195
+# define DATAEND ((ptr_t)(&_end))
198
+ /* __data_start is usually defined in the target linker script. */
199
+ extern int __data_start[];
200
+# define DATASTART ((ptr_t)__data_start)
201
+ extern void *__stack_base__;
202
+# define STACKBOTTOM ((ptr_t)__stack_base__)
207
# define CPP_WORDSZ 32
208
# define MACH_TYPE "ARM32"
209
--- a/src/include/ChangeLog.linaro
210
+++ b/src/include/ChangeLog.linaro
212
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
214
+ GCC Linaro 4.8-2013.05 released.
216
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
218
+ * GCC Linaro 4.8-2013.04 released.
219
--- a/src/libiberty/ChangeLog.linaro
220
+++ b/src/libiberty/ChangeLog.linaro
222
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
224
+ GCC Linaro 4.8-2013.05 released.
226
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
228
+ * GCC Linaro 4.8-2013.04 released.
229
--- a/src/lto-plugin/ChangeLog.linaro
230
+++ b/src/lto-plugin/ChangeLog.linaro
232
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
234
+ GCC Linaro 4.8-2013.05 released.
236
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
238
+ * GCC Linaro 4.8-2013.04 released.
239
--- a/src/contrib/regression/ChangeLog.linaro
240
+++ b/src/contrib/regression/ChangeLog.linaro
242
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
244
+ GCC Linaro 4.8-2013.05 released.
246
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
248
+ * GCC Linaro 4.8-2013.04 released.
249
--- a/src/contrib/config-list.mk
250
+++ b/src/contrib/config-list.mk
252
# nohup nice make -j25 -l36 -f ../gcc/contrib/config-list.mk > make.out 2>&1 &
254
# v850e1-elf is rejected by config.sub
255
-LIST = alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
256
+LIST = aarch64-elf aarch64-linux-gnu \
257
+ alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
258
alpha64-dec-vms alpha-dec-vms am33_2.0-linux \
259
arm-wrs-vxworks arm-netbsdelf \
260
arm-linux-androideabi arm-uclinux_eabi arm-eabi \
261
--- a/src/contrib/ChangeLog.linaro
262
+++ b/src/contrib/ChangeLog.linaro
264
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
266
+ GCC Linaro 4.8-2013.05 released.
268
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
270
+ Backport from trunk r198443.
271
+ 2013-04-22 Sofiane Naci <sofiane.naci@arm.com>
273
+ * config-list.mk (LIST): Add aarch64-elf and aarch64-linux-gnu.
275
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
277
+ * GCC Linaro 4.8-2013.04 released.
278
--- a/src/contrib/reghunt/ChangeLog.linaro
279
+++ b/src/contrib/reghunt/ChangeLog.linaro
281
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
283
+ GCC Linaro 4.8-2013.05 released.
285
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
287
+ * GCC Linaro 4.8-2013.04 released.
288
--- a/src/libatomic/ChangeLog.linaro
289
+++ b/src/libatomic/ChangeLog.linaro
291
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
293
+ GCC Linaro 4.8-2013.05 released.
295
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
297
+ * GCC Linaro 4.8-2013.04 released.
298
--- a/src/config/ChangeLog.linaro
299
+++ b/src/config/ChangeLog.linaro
301
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
303
+ GCC Linaro 4.8-2013.05 released.
305
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
307
+ * GCC Linaro 4.8-2013.04 released.
308
--- a/src/libbacktrace/ChangeLog.linaro
309
+++ b/src/libbacktrace/ChangeLog.linaro
311
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
313
+ GCC Linaro 4.8-2013.05 released.
315
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
317
+ * GCC Linaro 4.8-2013.04 released.
318
--- a/src/libjava/libltdl/ChangeLog.linaro
319
+++ b/src/libjava/libltdl/ChangeLog.linaro
321
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
323
+ GCC Linaro 4.8-2013.05 released.
325
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
327
+ * GCC Linaro 4.8-2013.04 released.
328
--- a/src/libjava/ChangeLog.linaro
329
+++ b/src/libjava/ChangeLog.linaro
331
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
333
+ GCC Linaro 4.8-2013.05 released.
335
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
337
+ * GCC Linaro 4.8-2013.04 released.
338
--- a/src/libjava/classpath/ChangeLog.linaro
339
+++ b/src/libjava/classpath/ChangeLog.linaro
341
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
343
+ GCC Linaro 4.8-2013.05 released.
345
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
347
+ * GCC Linaro 4.8-2013.04 released.
348
--- a/src/gnattools/ChangeLog.linaro
349
+++ b/src/gnattools/ChangeLog.linaro
351
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
353
+ GCC Linaro 4.8-2013.05 released.
355
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
357
+ * GCC Linaro 4.8-2013.04 released.
358
--- a/src/maintainer-scripts/ChangeLog.linaro
359
+++ b/src/maintainer-scripts/ChangeLog.linaro
361
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
363
+ GCC Linaro 4.8-2013.05 released.
365
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
367
+ * GCC Linaro 4.8-2013.04 released.
368
--- a/src/libgcc/ChangeLog.linaro
369
+++ b/src/libgcc/ChangeLog.linaro
371
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
373
+ GCC Linaro 4.8-2013.05 released.
375
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
377
+ Backport from trunk r198090.
378
+ 2013-04-19 Yufeng Zhang <yufeng.zhang@arm.com>
380
+ * config/aarch64/sfp-machine.h (_FP_W_TYPE): Change to define
381
+ as 'unsigned long long' instead of 'unsigned long'.
382
+ (_FP_WS_TYPE): Change to define as 'signed long long' instead of
385
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
387
+ * GCC Linaro 4.8-2013.04 released.
388
--- a/src/libgcc/config/aarch64/sfp-machine.h
389
+++ b/src/libgcc/config/aarch64/sfp-machine.h
391
<http://www.gnu.org/licenses/>. */
393
#define _FP_W_TYPE_SIZE 64
394
-#define _FP_W_TYPE unsigned long
395
-#define _FP_WS_TYPE signed long
396
+#define _FP_W_TYPE unsigned long long
397
+#define _FP_WS_TYPE signed long long
398
#define _FP_I_TYPE int
400
typedef int TItype __attribute__ ((mode (TI)));
401
--- a/src/libgcc/config/libbid/ChangeLog.linaro
402
+++ b/src/libgcc/config/libbid/ChangeLog.linaro
404
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
406
+ GCC Linaro 4.8-2013.05 released.
408
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
410
+ * GCC Linaro 4.8-2013.04 released.
411
--- a/src/libdecnumber/ChangeLog.linaro
412
+++ b/src/libdecnumber/ChangeLog.linaro
414
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
416
+ GCC Linaro 4.8-2013.05 released.
418
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
420
+ * GCC Linaro 4.8-2013.04 released.
421
--- a/src/gcc/LINARO-VERSION
422
+++ b/src/gcc/LINARO-VERSION
425
--- a/src/gcc/hooks.c
426
+++ b/src/gcc/hooks.c
431
+/* Generic hook that takes (gimple_stmt_iterator *) and returns
434
+hook_bool_gsiptr_false (gimple_stmt_iterator *a ATTRIBUTE_UNUSED)
439
/* Used for the TARGET_ASM_CAN_OUTPUT_MI_THUNK hook. */
441
hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree a ATTRIBUTE_UNUSED,
442
--- a/src/gcc/hooks.h
443
+++ b/src/gcc/hooks.h
445
extern bool hook_bool_const_tree_false (const_tree);
446
extern bool hook_bool_tree_true (tree);
447
extern bool hook_bool_const_tree_true (const_tree);
448
+extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *);
449
extern bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree,
452
--- a/src/gcc/c-family/ChangeLog.linaro
453
+++ b/src/gcc/c-family/ChangeLog.linaro
455
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
457
+ GCC Linaro 4.8-2013.05 released.
459
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
461
+ * GCC Linaro 4.8-2013.04 released.
462
--- a/src/gcc/java/ChangeLog.linaro
463
+++ b/src/gcc/java/ChangeLog.linaro
465
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
467
+ GCC Linaro 4.8-2013.05 released.
469
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
471
+ * GCC Linaro 4.8-2013.04 released.
472
--- a/src/gcc/c/ChangeLog.linaro
473
+++ b/src/gcc/c/ChangeLog.linaro
475
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
477
+ GCC Linaro 4.8-2013.05 released.
479
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
481
+ * GCC Linaro 4.8-2013.04 released.
482
--- a/src/gcc/target.def
483
+++ b/src/gcc/target.def
484
@@ -1289,13 +1289,24 @@
486
tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL)
488
-/* Fold a target-specific builtin. */
489
+/* Fold a target-specific builtin to a tree valid for both GIMPLE
494
tree, (tree fndecl, int n_args, tree *argp, bool ignore),
495
hook_tree_tree_int_treep_bool_null)
497
+/* Fold a target-specific builtin to a valid GIMPLE tree. */
499
+(gimple_fold_builtin,
500
+ "Fold a call to a machine specific built-in function that was set up\n\
501
+by @samp{TARGET_INIT_BUILTINS}. @var{gsi} points to the gimple\n\
502
+statement holding the function call. Returns true if any change\n\
503
+was made to the GIMPLE stream.",
504
+ bool, (gimple_stmt_iterator *gsi),
505
+ hook_bool_gsiptr_false)
507
/* Target hook is used to compare the target attributes in two functions to
508
determine which function's features get higher priority. This is used
509
during function multi-versioning to figure out the order in which two
510
--- a/src/gcc/configure
511
+++ b/src/gcc/configure
512
@@ -1658,7 +1658,8 @@
513
use sysroot as the system root during the build
514
--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
515
--with-specs=SPECS add SPECS to driver command-line processing
516
- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
517
+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
518
+ GCC `cat $srcdir/LINARO-VERSION`"
519
--with-bugurl=URL Direct users to URL to report a bug
520
--with-multilib-list select multilibs (SH and x86-64 only)
521
--with-gnu-ld assume the C compiler uses GNU ld default=no
522
@@ -7327,7 +7328,7 @@
523
*) PKGVERSION="($withval) " ;;
526
- PKGVERSION="(GCC) "
527
+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
531
--- a/src/gcc/objc/ChangeLog.linaro
532
+++ b/src/gcc/objc/ChangeLog.linaro
534
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
536
+ GCC Linaro 4.8-2013.05 released.
538
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
540
+ * GCC Linaro 4.8-2013.04 released.
541
--- a/src/gcc/ChangeLog.linaro
542
+++ b/src/gcc/ChangeLog.linaro
544
+2013-06-06 Zhenqiang Chen <zhenqiang.chen@linaro.org>
546
+ Backport from mainline (r199438, r199439)
547
+ 2013-05-30 Zhenqiang Chen <zhenqiang.chen@linaro.org>
549
+ * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added.
550
+ (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes.
551
+ (arm_emit_vfp_multi_reg_pop): Likewise.
552
+ (thumb2_emit_ldrd_pop): Likewise.
553
+ (arm_expand_epilogue): Add misc REG_CFA notes.
554
+ (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE.
556
+ 2013-05-30 Bernd Schmidt <bernds@codesourcery.com>
557
+ Zhenqiang Chen <zhenqiang.chen@linaro.org>
559
+ * config/arm/arm-protos.h: Add and update function protos.
560
+ * config/arm/arm.c (use_simple_return_p): New added.
561
+ (thumb2_expand_return): Check simple_return flag.
562
+ * config/arm/arm.md: Add simple_return and conditional simple_return.
563
+ * config/arm/iterators.md: Add iterator for return and simple_return.
564
+ * gcc.dg/shrink-wrap-alloca.c: New added.
565
+ * gcc.dg/shrink-wrap-pretend.c: New added.
566
+ * gcc.dg/shrink-wrap-sibcall.c: New added.
568
+2013-06-06 Kugan Vivekanandarajah <kuganv@linaro.org>
570
+ Backport from mainline r198879:
572
+ 2013-05-14 Chung-Lin Tang <cltang@codesourcery.com>
574
+ * config/arm/arm.h (EPILOGUE_USES): Only return true
575
+ for LR_REGNUM after epilogue_completed.
577
+2013-06-05 Christophe Lyon <christophe.lyon@linaro.org>
579
+ Backport from trunk r199652,199653,199656,199657,199658.
581
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
583
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Call
584
+ into function to generate MOVI instruction.
585
+ * config/aarch64/aarch64.c (aarch64_simd_container_mode):
587
+ (aarch64_preferred_simd_mode): Turn into wrapper.
588
+ (aarch64_output_scalar_simd_mov_immediate): New function.
589
+ * config/aarch64/aarch64-protos.h: Add prototype for above.
591
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
593
+ * config/aarch64/aarch64.c (simd_immediate_info): Remove
594
+ element_char member.
595
+ (sizetochar): Return signed char.
596
+ (aarch64_simd_valid_immediate): Remove elchar and other
597
+ unnecessary variables.
598
+ (aarch64_output_simd_mov_immediate): Take rtx instead of &rtx.
599
+ Calculate element_char as required.
600
+ * config/aarch64/aarch64-protos.h: Update and move prototype
601
+ for aarch64_output_simd_mov_immediate.
602
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>):
605
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
607
+ * config/aarch64/aarch64.c (simd_immediate_info): Struct to hold
608
+ information completed by aarch64_simd_valid_immediate.
609
+ (aarch64_legitimate_constant_p): Update arguments.
610
+ (aarch64_simd_valid_immediate): Work with struct rather than many
612
+ (aarch64_simd_scalar_immediate_valid_for_move): Update arguments.
613
+ (aarch64_simd_make_constant): Update arguments.
614
+ (aarch64_output_simd_mov_immediate): Work with struct rather than
615
+ many pointers. Output immediate directly rather than as operand.
616
+ * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate):
618
+ * config/aarch64/constraints.md (Dn): Update arguments.
620
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
622
+ * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): No
624
+ (aarch64_simd_immediate_valid_for_move): Remove.
625
+ (aarch64_simd_scalar_immediate_valid_for_move): Update call.
626
+ (aarch64_simd_make_constant): Update call.
627
+ (aarch64_output_simd_mov_immediate): Update call.
628
+ * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate):
630
+ * config/aarch64/constraints.md (Dn): Update call.
632
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
634
+ * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Change
635
+ return type to bool for prototype.
636
+ (aarch64_legitimate_constant_p): Check for true instead of not -1.
637
+ (aarch64_simd_valid_immediate): Fix up each return to return a bool.
638
+ (aarch64_simd_immediate_valid_for_move): Update retval for bool.
640
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
642
+ Backport from trunk r199261.
643
+ 2013-05-23 Christian Bruel <christian.bruel@st.com>
646
+ * config/arm/arm.c (arm_dwarf_register_span): Do not use dbx number.
648
+2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
650
+ Backport from trunk
651
+ r198890,199254,199259,199260,199293,199407,199408,199454,199544,199545.
653
+ 2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com>
655
+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
656
+ Remove un-necessary braces.
658
+ 2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com>
660
+ * config/aarch64/aarch64.c (aarch64_classify_symbol):
661
+ Use SYMBOL_TINY_ABSOLUTE for AARCH64_CMODEL_TINY_PIC.
663
+ 2013-05-30 Ian Bolton <ian.bolton@arm.com>
665
+ * config/aarch64/aarch64.md (insv<mode>): New define_expand.
666
+ (*insv_reg<mode>): New define_insn.
668
+ 2012-05-29 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
669
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
671
+ * config/aarch64/aarch64-protos.h (aarch64_symbol_type): Define
672
+ SYMBOL_TINY_ABSOLUTE.
673
+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Handle
674
+ SYMBOL_TINY_ABSOLUTE.
675
+ (aarch64_expand_mov_immediate): Likewise.
676
+ (aarch64_classify_symbol): Likewise.
677
+ (aarch64_mov_operand_p): Remove ATTRIBUTE_UNUSED.
678
+ Permit SYMBOL_TINY_ABSOLUTE.
679
+ * config/aarch64/predicates.md (aarch64_mov_operand): Permit CONST.
681
+ 2013-05-29 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
682
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
684
+ * config/aarch64/aarch64.c (aarch64_classify_symbol): Remove comment.
685
+ Refactor if/switch. Replace gcc_assert with if.
687
+ 2013-05-24 Ian Bolton <ian.bolton@arm.com>
689
+ * config/aarch64/aarch64.c (aarch64_print_operand): Change the
690
+ X format specifier to only display bottom 16 bits.
691
+ * config/aarch64/aarch64.md (insv_imm<mode>): Allow any size of
692
+ immediate to match for operand 2, since it will be masked.
694
+ 2013-05-23 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
695
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
697
+ * config/aarch64/aarch64.md (*movdi_aarch64): Replace Usa with S.
698
+ * config/aarch64/constraints.md (Usa): Remove.
699
+ * doc/md.texi (AArch64 Usa): Remove.
701
+ 2013-05-23 Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com>
702
+ Marcus Shawcroft <marcus.shawcroft@arm.com>
704
+ * config/aarch64/aarch64-protos.h (aarch64_mov_operand_p): Define.
705
+ * config/aarch64/aarch64.c (aarch64_mov_operand_p): Define.
706
+ * config/aarch64/predicates.md (aarch64_const_address): Remove.
707
+ (aarch64_mov_operand): Use aarch64_mov_operand_p.
709
+ 2013-05-23 Vidya Praveen <vidyapraveen@arm.com>
711
+ * config/aarch64/aarch64-simd.md (clzv4si2): Support for CLZ
712
+ instruction (AdvSIMD).
713
+ * config/aarch64/aarch64-builtins.c
714
+ (aarch64_builtin_vectorized_function): Handler for BUILT_IN_CLZ.
715
+ * config/aarch64/aarch-simd-builtins.def: Entry for CLZ.
717
+ 2013-05-14 James Greenhalgh <james.greenhalgh@arm.com>
719
+ * config/aarch64/aarch64-simd.md
720
+ (aarch64_vcond_internal<mode>): Rename to...
721
+ (aarch64_vcond_internal<mode><mode>): ...This, for integer modes.
722
+ (aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): ...This for
723
+ float modes. Clarify all iterator modes.
724
+ (vcond<mode><mode>): Use new name for vcond expanders.
725
+ (vcond<v_cmp_result><mode>): Likewise.
726
+ (vcondu<mode><mode>: Likewise.
727
+ * config/aarch64/iterators.md (VDQF_COND): New.
729
+2013-05-29 Christophe Lyon <christophe.lyon@linaro.org>
731
+ Backport from trunk r198928,198973,199203.
732
+ 2013-05-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
736
+ * config/arm/arm.c (any_sibcall_uses_r3): Rename to ..
737
+ (any_sibcall_could_use_r3): this and handle indirect calls.
738
+ (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3.
740
+ 2013-05-16 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
743
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Add check
746
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
749
+ * config/arm/predicates.md (call_insn_operand): New predicate.
750
+ * config/arm/constraints.md ("Cs", "Ss"): New constraints.
751
+ * config/arm/arm.md (*call_insn, *call_value_insn): Match only
752
+ if insn is not a tail call.
753
+ (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through
755
+ * config/arm/arm.h (enum reg_class): New caller save register class.
756
+ (REG_CLASS_NAMES): Likewise.
757
+ (REG_CLASS_CONTENTS): Likewise.
758
+ * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling
761
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
763
+ Backport from trunk r198680.
764
+ 2013-05-07 Sofiane Naci <sofiane.naci@arm.com>
766
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): call splitter.
767
+ (aarch64_simd_mov<mode>): New expander.
768
+ (aarch64_simd_mov_to_<mode>low): New instruction pattern.
769
+ (aarch64_simd_mov_to_<mode>high): Likewise.
770
+ (aarch64_simd_mov_from_<mode>low): Likewise.
771
+ (aarch64_simd_mov_from_<mode>high): Likewise.
772
+ (aarch64_dup_lane<mode>): Update.
773
+ (aarch64_dup_lanedi): New instruction pattern.
774
+ * config/aarch64/aarch64-protos.h (aarch64_split_simd_move): New prototype.
775
+ * config/aarch64/aarch64.c (aarch64_split_simd_move): New function.
777
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
779
+ Backport from trunk r198497-198500.
780
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
782
+ * config/aarch64/aarch64-builtins.c
783
+ (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_.
784
+ * config/aarch64/aarch64-simd-builtins.def
785
+ (reduc_splus_): Add new modes.
786
+ (reduc_uplus_): New.
787
+ * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove.
788
+ (reduc_uplus_v4sf): Likewise.
789
+ (reduc_splus_v4sf): Likewise.
790
+ (aarch64_addv<mode>): Likewise.
791
+ (reduc_uplus_<mode>): Likewise.
792
+ (reduc_splus_<mode>): Likewise.
793
+ (aarch64_addvv2di): Likewise.
794
+ (reduc_uplus_v2di): Likewise.
795
+ (reduc_splus_v2di): Likewise.
796
+ (aarch64_addvv2si): Likewise.
797
+ (reduc_uplus_v2si): Likewise.
798
+ (reduc_splus_v2si): Likewise.
799
+ (reduc_<sur>plus_<mode>): New.
800
+ (reduc_<sur>plus_v2di): Likewise.
801
+ (reduc_<sur>plus_v2si): Likewise.
802
+ (reduc_<sur>plus_v4sf): Likewise.
803
+ (aarch64_addpv4sf): Likewise.
804
+ * config/aarch64/arm_neon.h
805
+ (vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins.
806
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV,
807
+ add UNSPEC_SADDV, UNSPEC_UADDV.
809
+ (sur): Add UNSPEC_SADDV, UNSPEC_UADDV.
811
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
813
+ * config/aarch64/arm_neon.h
814
+ (v<max,min><nm><q><v>_<sfu><8, 16, 32, 64>): Rewrite using builtins.
816
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
818
+ * config/aarch64/aarch64-builtins
819
+ (aarch64_gimple_fold_builtin): Fold reduc_<su><maxmin>_ builtins.
821
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
823
+ * config/aarch64/aarch64-simd-builtins.def
824
+ (reduc_smax_): New.
825
+ (reduc_smin_): Likewise.
826
+ (reduc_umax_): Likewise.
827
+ (reduc_umin_): Likewise.
828
+ (reduc_smax_nan_): Likewise.
829
+ (reduc_smin_nan_): Likewise.
832
+ (smax): Update for V2SF, V4SF and V2DF modes.
835
+ (smin_nan): Likewise.
836
+ * config/aarch64/aarch64-simd.md (<maxmin><mode>3): Rename to...
837
+ (<su><maxmin><mode>3): ...This, refactor.
838
+ (s<maxmin><mode>3): New.
839
+ (<maxmin_uns><mode>3): Likewise.
840
+ (reduc_<maxmin_uns>_<mode>): Refactor.
841
+ (reduc_<maxmin_uns>_v4sf): Likewise.
842
+ (reduc_<maxmin_uns>_v2si): Likewise.
843
+ (aarch64_<fmaxmin><mode>: Remove.
844
+ * config/aarch64/arm_neon.h (vmax<q>_f<32,64>): Rewrite to use
846
+ (vmin<q>_f<32,64>): Likewise.
847
+ * config/iterators.md (unspec): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV.
849
+ (su): Add mappings for smax, smin, umax, umin.
851
+ (FMAXMINV): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV.
852
+ (FMAXMIN): Rename as...
853
+ (FMAXMIN_UNS): ...This.
855
+ (fmaxminv): Likewise.
856
+ (fmaxmin): Likewise.
858
+ (maxmin_uns_op): Likewise.
860
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
862
+ Backport from trunk r199241.
863
+ 2013-05-23 James Greenhalgh <james.greenhalgh@arm.com>
865
+ * config/aarch64/aarch64-simd.md
866
+ (aarch64_cm<optab>di): Add clobber of CC_REGNUM to unsplit pattern.
868
+2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
870
+ Backport from trunk r198970.
871
+ 2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
873
+ * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
874
+ * config/arm/arm.c (next_consecutive_mem): New function.
875
+ (gen_movmem_ldrd_strd): Likewise.
876
+ * config/arm/arm.md (movmemqi): Update condition and code.
877
+ (unaligned_loaddi, unaligned_storedi): New patterns.
879
+2013-05-19 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
881
+ * LINARO-VERSION: Bump version number.
883
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
885
+ GCC Linaro 4.8-2013.05 released.
887
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
889
+ Backport from trunk r198677.
890
+ 2013-05-07 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
892
+ * config/aarch64/aarch64.md
893
+ (cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): Restrict the
894
+ shift value between 0-4.
896
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
898
+ Backport from trunk r198574-198575.
899
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
901
+ * config/aarch64/aarch64-simd.md (simd_fabd): Correct the description.
903
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
905
+ * config/aarch64/aarch64-simd.md (*fabd_scalar<mode>3): Support
906
+ scalar form of FABD instruction.
908
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
910
+ Backport from trunk r198490-198496
911
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
913
+ * config/aarch64/arm_neon.h
914
+ (vac<ge, gt><sd>_f<32, 64>): Rename to...
915
+ (vca<ge, gt><sd>_f<32, 64>): ...this, reimpliment in C.
916
+ (vca<ge, gt, lt, le><q>_f<32, 64>): Reimpliment in C.
918
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
920
+ * config/aarch64/aarch64-simd.md (*aarch64_fac<optab><mode>): New.
921
+ * config/aarch64/iterators.md (FAC_COMPARISONS): New.
923
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
925
+ * config/aarch64/aarch64-simd.md
926
+ (vcond<mode>_internal): Handle special cases for constant masks.
927
+ (vcond<mode><mode>): Allow nonmemory_operands for outcome vectors.
928
+ (vcondu<mode><mode>): Likewise.
929
+ (vcond<v_cmp_result><mode>): New.
931
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
933
+ * config/aarch64/aarch64-builtins.c (BUILTIN_VALLDI): Define.
934
+ (aarch64_fold_builtin): Add folding for cm<eq,ge,gt,tst>.
935
+ * config/aarch64/aarch64-simd-builtins.def
936
+ (cmeq): Update to BUILTIN_VALLDI.
941
+ * config/aarch64/arm_neon.h
942
+ (vc<eq, lt, le, gt, ge, tst><z><qsd>_<fpsu><8,16,32,64>): Remap
943
+ to builtins or C as appropriate.
945
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
947
+ * config/aarch64/aarch64-simd-builtins.def (cmhs): Rename to...
949
+ (cmhi): Rename to...
951
+ * config/aarch64/aarch64-simd.md
952
+ (simd_mode): Add SF.
953
+ (aarch64_vcond_internal): Use new names for unsigned comparison insns.
954
+ (aarch64_cm<optab><mode>): Rewrite to not use UNSPECs.
955
+ * config/aarch64/aarch64.md (*cstore<mode>_neg): Rename to...
956
+ (cstore<mode>_neg): ...This.
957
+ * config/aarch64/iterators.md
959
+ (unspec): Remove UNSPEC_CM<EQ, LE, LT, GE, GT, HS, HI, TST>.
960
+ (COMPARISONS): New.
961
+ (UCOMPARISONS): Likewise.
962
+ (optab): Add missing comparisons.
968
+ (VCMP_S): Likewise.
969
+ (VCMP_U): Likewise.
970
+ (V_cmp_result): Add DF, SF modes.
971
+ (v_cmp_result): Likewise.
973
+ (vmtype): Likewise.
974
+ * config/aarch64/predicates.md (aarch64_reg_or_fp_zero): New.
976
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
978
+ Backport from trunk r198191.
979
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
981
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add simd attribute.
983
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@lianro.org>
985
+ Backport from trunk r197838.
986
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
988
+ * config/aarch64/aarch64.c (aarch64_select_cc_mode): Allow NEG
989
+ code in CC_NZ mode.
990
+ * config/aarch64/aarch64.md (*neg_<shift><mode>3_compare0): New
993
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
995
+ Backport from trunk r198019.
996
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
998
+ * config/aarch64/aarch64.md (*adds_mul_imm_<mode>): New pattern.
999
+ (*subs_mul_imm_<mode>): New pattern.
1001
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1003
+ Backport from trunk r198424-198425.
1004
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
1006
+ * config/aarch64/aarch64.md (movsi_aarch64): Support LDR/STR
1007
+ from/to S register.
1008
+ (movdi_aarch64): Support LDR/STR from/to D register.
1010
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
1012
+ * common/config/aarch64/aarch64-common.c: Enable REE pass at O2
1013
+ or higher by default.
1015
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1017
+ Backport from trunk r198412.
1018
+ 2013-04-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1020
+ * config/arm/arm.md (store_minmaxsi): Use only when
1021
+ optimize_insn_for_size_p.
1023
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1025
+ Backport from trunk 198394,198396-198400,198402-198404.
1026
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1028
+ * config/aarch64/arm_neon.h
1029
+ (vcvt<sd>_f<32,64>_s<32,64>): Rewrite in C.
1030
+ (vcvt<q>_f<32,64>_s<32,64>): Rewrite using builtins.
1031
+ (vcvt_<high_>_f<32,64>_f<32,64>): Likewise.
1032
+ (vcvt<qsd>_<su><32,64>_f<32,64>): Likewise.
1033
+ (vcvta<qsd>_<su><32,64>_f<32,64>): Likewise.
1034
+ (vcvtm<qsd>_<su><32,64>_f<32,64>): Likewise.
1035
+ (vcvtn<qsd>_<su><32,64>_f<32,64>): Likewise.
1036
+ (vcvtp<qsd>_<su><32,64>_f<32,64>): Likewise.
1038
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1040
+ * config/aarch64/aarch64-simd.md
1041
+ (<optab><VDQF:mode><fcvt_target>2): New, maps to fix, fixuns.
1042
+ (<fix_trunc_optab><VDQF:mode><fcvt_target>2): New, maps to
1043
+ fix_trunc, fixuns_trunc.
1044
+ (ftrunc<VDQF:mode>2): New.
1045
+ * config/aarch64/iterators.md (optab): Add fix, fixuns.
1046
+ (fix_trunc_optab): New.
1048
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1050
+ * config/aarch64/aarch64-builtins.c
1051
+ (aarch64_builtin_vectorized_function): Vectorize over ifloorf,
1052
+ iceilf, lround, iroundf.
1054
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1056
+ * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
1057
+ (float_truncate_hi_): Likewise.
1058
+ (float_extend_lo_): Likewise.
1059
+ (float_truncate_lo_): Likewise.
1060
+ * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
1061
+ (aarch64_float_extend_lo_v2df): Likewise.
1062
+ (vec_unpacks_hi_v4sf): Likewise.
1063
+ (aarch64_float_truncate_lo_v2sf): Likewise.
1064
+ (aarch64_float_truncate_hi_v4sf): Likewise.
1065
+ (vec_pack_trunc_v2df): Likewise.
1066
+ (vec_pack_trunc_df): Likewise.
1068
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1070
+ * config/aarch64/aarch64-builtins.c
1071
+ (aarch64_fold_builtin): Fold float conversions.
1072
+ * config/aarch64/aarch64-simd-builtins.def
1073
+ (floatv2si, floatv4si, floatv2di): New.
1074
+ (floatunsv2si, floatunsv4si, floatunsv2di): Likewise.
1075
+ * config/aarch64/aarch64-simd.md
1076
+ (<optab><fcvt_target><VDQF:mode>2): New, expands to float and floatuns.
1077
+ * config/aarch64/iterators.md (FLOATUORS): New.
1078
+ (optab): Add float, floatuns.
1079
+ (su_optab): Likewise.
1081
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
1083
+ * config/aarch64/aarch64-builtins.c
1084
+ (aarch64_builtin_vectorized_function): Fold to standard pattern names.
1085
+ * config/aarch64/aarch64-simd-builtins.def (frintn): New.
1086
+ (frintz): Rename to...
1087
+ (btrunc): ...this.
1088
+ (frintp): Rename to...
1090
+ (frintm): Rename to...
1092
+ (frinti): Rename to...
1093
+ (nearbyint): ...this.
1094
+ (frintx): Rename to...
1096
+ (frinta): Rename to...
1098
+ * config/aarch64/aarch64-simd.md
1099
+ (aarch64_frint<frint_suffix><mode>): Delete.
1100
+ (<frint_pattern><mode>2): Convert to insn.
1101
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRINTN.
1102
+ * config/aarch64/iterators.md (FRINT): Add UNSPEC_FRINTN.
1103
+ (frint_pattern): Likewise.
1104
+ (frint_suffix): Likewise.
1106
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1108
+ Backport from trunk r198302-198306,198316.
1109
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
1111
+ * config/aarch64/aarch64-simd.md
1112
+ (aarch64_simd_bsl<mode>_internal): Rewrite RTL to not use UNSPEC_BSL.
1113
+ (aarch64_simd_bsl<mode>): Likewise.
1114
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_BSL.
1116
+ 2013-04-25 James Greenhalgh <jame.greenhalgh@arm.com>
1118
+ * config/aarch64/aarch64-simd.md (neg<mode>2): Use VDQ iterator.
1120
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
1122
+ * config/aarch64/aarch64-builtins.c
1123
+ (aarch64_fold_builtin): New.
1124
+ * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): New.
1125
+ * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
1126
+ * config/aarch64/aarch64-simd-builtins.def (abs): New.
1127
+ * config/aarch64/arm_neon.h
1128
+ (vabs<q>_<f32, 64>): Implement using __builtin_aarch64_fabs.
1130
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
1131
+ Tejas Belagod <tejas.belagod@arm.com>
1133
+ * config/aarch64/aarch64-builtins.c
1134
+ (aarch64_gimple_fold_builtin): New.
1135
+ * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
1136
+ * config/aarch64/aarch64-simd-builtins.def (addv): New.
1137
+ * config/aarch64/aarch64-simd.md (addpv4sf): New.
1138
+ (addvv4sf): Update.
1139
+ * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
1141
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1143
+ * config/aarch64/aarch64.md
1144
+ (*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): New pattern.
1146
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1148
+ * config/aarch64/aarch64.md (*ngc<mode>): New pattern.
1149
+ (*ngcsi_uxtw): New pattern.
1151
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1153
+ Backport from trunk 198298.
1154
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1155
+ Julian Brown <julian@codesourcery.com>
1157
+ * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
1158
+ (TB_DREG): Add T_V4HF.
1159
+ (v4hf_UP): New macro.
1160
+ (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
1161
+ (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
1162
+ NEON_FLOAT_NARROW.
1163
+ Handle initialisation of V4HF. Adjust initialisation of reinterpret
1165
+ (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
1166
+ NEON_FLOAT_NARROW.
1167
+ (arm_vector_mode_supported_p): Handle V4HF.
1168
+ (arm_mangle_map): Handle V4HFmode.
1169
+ * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
1170
+ * config/arm/arm_neon_builtins.def: Add entries for
1171
+ vcvtv4hfv4sf, vcvtv4sfv4hf.
1172
+ * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
1173
+ (neon_vcvtv4hfv4sf): Likewise.
1174
+ * config/arm/neon-gen.ml: Handle half-precision floating point
1176
+ * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
1177
+ * config/arm/arm_neon.h: Regenerate.
1178
+ * config/arm/neon.ml (type elts): Add F16.
1179
+ (type vectype): Add T_float16x4, T_floatHF.
1180
+ (type vecmode): Add V4HF.
1181
+ (type features): Add Requires_FP_bit feature.
1182
+ (elt_width): Handle F16.
1183
+ (elt_class): Likewise.
1184
+ (elt_of_class_width): Likewise.
1185
+ (mode_of_elt): Refactor.
1186
+ (type_for_elt): Handle F16, fix error messages.
1187
+ (vectype_size): Handle T_float16x4.
1188
+ (vcvt_sh): New function.
1189
+ (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
1190
+ (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
1191
+ (string_of_mode): Handle V4HF.
1192
+ * doc/arm-neon-intrinsics.texi: Regenerate.
1194
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1196
+ Backport from trunk r198136-198137,198142,198176.
1197
+ 2013-04-23 Andreas Schwab <schwab@linux-m68k.org>
1199
+ * coretypes.h (gimple_stmt_iterator): Add struct to make
1200
+ compatible with C.
1202
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
1204
+ * coretypes.h (gimple_stmt_iterator_d): Forward declare.
1205
+ (gimple_stmt_iterator): New typedef.
1206
+ * gimple.h (gimple_stmt_iterator): Rename to...
1207
+ (gimple_stmt_iterator_d): ... This.
1208
+ * doc/tm.texi.in (TARGET_FOLD_BUILTIN): Detail restriction that
1209
+ trees be valid for GIMPLE and GENERIC.
1210
+ (TARGET_GIMPLE_FOLD_BUILTIN): New.
1211
+ * gimple-fold.c (gimple_fold_call): Call target hook
1212
+ gimple_fold_builtin.
1213
+ * hooks.c (hook_bool_gsiptr_false): New.
1214
+ * hooks.h (hook_bool_gsiptr_false): New.
1215
+ * target.def (fold_stmt): New.
1216
+ * doc/tm.texi: Regenerate.
1218
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
1220
+ * config/aarch64/aarch64-builtins.c
1222
+ (CF0, CF1, CF2, CF3, CF4, CF10): New.
1223
+ (VAR<1-12>): Add MAP parameter.
1224
+ (BUILTIN_*): Likewise.
1225
+ * config/aarch64/aarch64-simd-builtins.def: Set MAP parameter.
1226
+ * config/aarch64/aarch64-simd.md (aarch64_sshl_n<mode>): Remove.
1227
+ (aarch64_ushl_n<mode>): Likewise.
1228
+ (aarch64_sshr_n<mode>): Likewise.
1229
+ (aarch64_ushr_n<mode>): Likewise.
1230
+ (aarch64_<maxmin><mode>): Likewise.
1231
+ (aarch64_sqrt<mode>): Likewise.
1232
+ * config/aarch64/arm_neon.h (vshl<q>_n_*): Use new builtin names.
1233
+ (vshr<q>_n_*): Likewise.
1235
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
1237
+ * config/aarch64/aarch64-builtins.c
1238
+ (aarch64_simd_builtin_type_mode): Handle SF types.
1240
+ (BUILTIN_GPF): Define.
1241
+ (aarch64_init_simd_builtins): Handle SF types.
1242
+ * config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
1243
+ (frecps): Likewise.
1244
+ (frecpx): Likewise.
1245
+ * config/aarch64/aarch64-simd.md
1246
+ (simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
1247
+ (aarch64_frecpe<mode>): New.
1248
+ (aarch64_frecps<mode>): Likewise.
1249
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
1250
+ (v8type): Add frecp<esx>.
1251
+ (aarch64_frecp<FRECP:frecp_suffix><mode>): New.
1252
+ (aarch64_frecps<mode>): Likewise.
1253
+ * config/aarch64/iterators.md (FRECP): New.
1254
+ (frecp_suffix): Likewise.
1255
+ * config/aarch64/arm_neon.h
1256
+ (vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.
1258
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1260
+ Backport from trunk r198030.
1261
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
1263
+ * config/arm/arm.md (movsicc_insn): Convert define_insn into
1264
+ define_insn_and_split.
1265
+ (and_scc,ior_scc,negscc): Likewise.
1266
+ (cmpsi2_addneg, subsi3_compare): Convert to named patterns.
1268
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1270
+ Backport from trunk r198020.
1271
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1273
+ * config/aarch64/aarch64.md (*adds_<optab><mode>_multp2):
1275
+ (*subs_<optab><mode>_multp2): New pattern.
1276
+ (*adds_<optab><ALLX:mode>_<GPI:mode>): New pattern.
1277
+ (*subs_<optab><ALLX:mode>_<GPI:mode>): New pattern.
1279
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1281
+ Backport from trunk r198004,198029.
1282
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
1284
+ * config/arm/arm.c (use_return_insn): Return 0 for targets that
1285
+ can benefit from using a sequence of LDRD instructions in epilogue
1286
+ instead of a single LDM instruction.
1288
+ 2013-04-16 Greta Yorsh <Greta.Yorsh at arm.com>
1290
+ * config/arm/arm.c (emit_multi_reg_push): New declaration
1291
+ for an existing function.
1292
+ (arm_emit_strd_push): New function.
1293
+ (arm_expand_prologue): Used here.
1294
+ (arm_emit_ldrd_pop): New function.
1295
+ (arm_expand_epilogue): Used here.
1296
+ (arm_get_frame_offsets): Update condition.
1297
+ (arm_emit_multi_reg_pop): Add a special case for load of a single
1298
+ register with writeback.
1300
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1302
+ Backport from trunk r197965.
1303
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1305
+ * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case.
1306
+ * config/arm/arm.md (*anddi3_insn): Change to insn_and_split.
1307
+ * config/arm/constraints.md (De): New constraint.
1308
+ * config/arm/neon.md (anddi3_neon): Delete.
1309
+ (neon_vand<mode>): Expand to standard anddi3 pattern.
1310
+ * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
1311
+ Move earlier in the file.
1312
+ (neon_inv_logic_op2): Likewise.
1313
+ (arm_anddi_operand_neon): New predicate.
1315
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1317
+ Backport from trunk r197925.
1318
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
1320
+ * config/arm/arm.md (mov_scc,mov_negscc,mov_notscc): Convert
1321
+ define_insn into define_insn_and_split and emit movsicc patterns.
1323
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1325
+ Backport from trunk r197807.
1326
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1328
+ * config/aarch64/aarch64.h (REVERSIBLE_CC_MODE): Define.
1330
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1332
+ Backport from trunk r197642.
1333
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1335
+ * config/arm/arm.md (minmax_arithsi_non_canon): New pattern.
1337
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1339
+ Backport from trunk r197530,197921.
1340
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
1342
+ * config/arm/arm.c (gen_operands_ldrd_strd): Initialize "base".
1344
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1346
+ * config/arm/constraints.md (q): New constraint.
1347
+ * config/arm/ldrdstrd.md: New file.
1348
+ * config/arm/arm.md (ldrdstrd.md) New include.
1349
+ (arm_movdi): Use "q" instead of "r" constraint
1350
+ for double-word memory access.
1351
+ (movdf_soft_insn): Likewise.
1352
+ * config/arm/vfp.md (movdi_vfp): Likewise.
1353
+ * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md.
1354
+ * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration.
1355
+ * config/arm/arm.c (gen_operands_ldrd_strd): New function.
1356
+ (mem_ok_for_ldrd_strd): Likewise.
1357
+ (output_move_double): Update assertion.
1359
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1361
+ Backport of trunk r197518-197522,197526-197528.
1362
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1364
+ * config/arm/arm.md (arm_smax_insn): Convert define_insn into
1365
+ define_insn_and_split.
1366
+ (arm_smin_insn,arm_umaxsi3,arm_uminsi3): Likewise.
1368
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1370
+ * config/arm/arm.md (arm_ashldi3_1bit): Convert define_insn into
1371
+ define_insn_and_split.
1372
+ (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise.
1373
+ (shiftsi3_compare): New pattern.
1374
+ (rrx): New pattern.
1375
+ * config/arm/unspecs.md (UNSPEC_RRX): New.
1377
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1379
+ * config/arm/arm.md (negdi_extendsidi): New pattern.
1380
+ (negdi_zero_extendsidi): Likewise.
1382
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1384
+ * config/arm/arm.md (andsi_iorsi3_notsi): Convert define_insn into
1385
+ define_insn_and_split.
1386
+ (arm_negdi2,arm_abssi2,arm_neg_abssi2): Likewise.
1387
+ (arm_cmpdi_insn,arm_cmpdi_unsigned): Likewise.
1389
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1391
+ * config/arm/arm.md (arm_subdi3): Convert define_insn into
1392
+ define_insn_and_split.
1393
+ (subdi_di_zesidi,subdi_di_sesidi): Likewise.
1394
+ (subdi_zesidi_di,subdi_sesidi_di,subdi_zesidi_zesidi): Likewise.
1396
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1398
+ * config/arm/arm.md (subsi3_carryin): New pattern.
1399
+ (subsi3_carryin_const): Likewise.
1400
+ (subsi3_carryin_compare,subsi3_carryin_compare_const): Likewise.
1401
+ (subsi3_carryin_shift,rsbsi3_carryin_shift): Likewise.
1403
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1405
+ * config/arm/arm.md (incscc,arm_incscc,decscc,arm_decscc): Delete.
1407
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
1409
+ * config/arm/arm.md (addsi3_carryin_<optab>): Set attribute predicable.
1410
+ (addsi3_carryin_alt2_<optab>,addsi3_carryin_shift_<optab>): Likewise.
1412
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1414
+ Backport of trunk r197517.
1415
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1417
+ * config/arm/arm.c (arm_expand_builtin): Change fcode
1418
+ type to unsigned int.
1420
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1422
+ Backport of trunk r197513.
1423
+ 2013-04-05 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1425
+ * doc/invoke.texi (ARM Options): Document cortex-a53 support.
1427
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1429
+ Backport of trunk r197489-197491.
1430
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1432
+ * config/arm/arm-protos.h (arm_builtin_vectorized_function):
1433
+ New function prototype.
1434
+ * config/arm/arm.c (TARGET_VECTORIZE_BUILTINS): Define.
1435
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
1436
+ (arm_builtin_vectorized_function): New function.
1438
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1440
+ * config/arm/arm_neon_builtins.def: New file.
1441
+ * config/arm/arm.c (neon_builtin_data): Move contents to
1442
+ arm_neon_builtins.def.
1443
+ (enum arm_builtins): Include neon builtin definitions.
1444
+ (ARM_BUILTIN_NEON_BASE): Move from enum to macro.
1445
+ * config/arm/t-arm (arm.o): Add dependency on
1446
+ arm_neon_builtins.def.
1448
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1450
+ Backport of trunk 196795-196797,196957
1451
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
1453
+ * config/aarch64/aarch64.md (*sub<mode>3_carryin): New pattern.
1454
+ (*subsi3_carryin_uxtw): Likewise.
1456
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
1458
+ * config/aarch64/aarch64.md (*ror<mode>3_insn): New pattern.
1459
+ (*rorsi3_insn_uxtw): Likewise.
1461
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
1463
+ * config/aarch64/aarch64.md (*extr<mode>5_insn): New pattern.
1464
+ (*extrsi5_insn_uxtw): Likewise.
1466
+2013-04-10 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1468
+ * LINARO-VERSION: Bump version number.
1470
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1472
+ * GCC Linaro 4.8-2013.04 released.
1474
+ * LINARO-VERSION: New file.
1475
+ * configure.ac: Add Linaro version string.
1476
+ * configure: Regenerate.
1478
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1480
+ Backport of trunk r197346.
1481
+ 2013-04-02 Ian Caulfield <ian.caulfield@arm.com>
1482
+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
1484
+ * config/arm/arm-arches.def (armv8-a): Default to cortex-a53.
1485
+ * config/arm/t-arm (MD_INCLUDES): Depend on cortex-a53.md.
1486
+ * config/arm/cortex-a53.md: New file.
1487
+ * config/arm/bpabi.h (BE8_LINK_SPEC): Handle cortex-a53.
1488
+ * config/arm/arm.md (generic_sched, generic_vfp): Handle cortex-a53.
1489
+ * config/arm/arm.c (arm_issue_rate): Likewise.
1490
+ * config/arm/arm-tune.md: Regenerate
1491
+ * config/arm/arm-tables.opt: Regenerate.
1492
+ * config/arm/arm-cores.def: Add cortex-a53.
1494
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1496
+ Backport of trunk r197342.
1497
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
1499
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add variants for
1500
+ scalar load/store operations using B/H registers.
1501
+ (*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Likewise.
1503
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1505
+ Backport of trunk r197341.
1506
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
1508
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add alternatives for
1510
+ * config/aarch64/aarch64.c
1511
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
1512
+ * config/aarch64/aarch64-protos.h
1513
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
1514
+ * config/aarch64/constraints.md (Dh, Dq): New.
1515
+ * config/aarch64/iterators.md (hq): New.
1517
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1519
+ Backport from trunk r197207.
1520
+ 2013-03-28 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1522
+ * config/aarch64/aarch64.md (*and<mode>3_compare0): New pattern.
1523
+ (*andsi3_compare0_uxtw): New pattern.
1524
+ (*and_<SHIFT:optab><mode>3_compare0): New pattern.
1525
+ (*and_<SHIFT:optab>si3_compare0_uxtw): New pattern.
1527
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1529
+ Backport from trunk r197153.
1530
+ 2013-03-27 Terry Guo <terry.guo@arm.com>
1532
+ * config/arm/arm-cores.def: Added core cortex-r7.
1533
+ * config/arm/arm-tune.md: Regenerated.
1534
+ * config/arm/arm-tables.opt: Regenerated.
1535
+ * doc/invoke.texi: Added entry for core cortex-r7.
1537
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1539
+ Backport from trunk r197052.
1540
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1542
+ * config/arm/arm.md (f_sels, f_seld): New types.
1543
+ (*cmov<mode>): New pattern.
1544
+ * config/arm/predicates.md (arm_vsel_comparison_operator): New
1547
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1549
+ Backport from trunk r197046.
1550
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1552
+ * config/arm/arm.c (arm_emit_load_exclusive): Add acq parameter.
1553
+ Emit load-acquire versions when acq is true.
1554
+ (arm_emit_store_exclusive): Add rel parameter.
1555
+ Emit store-release versions when rel is true.
1556
+ (arm_split_compare_and_swap): Use acquire-release instructions
1558
+ of barriers when appropriate.
1559
+ (arm_split_atomic_op): Likewise.
1560
+ * config/arm/arm.h (TARGET_HAVE_LDACQ): New macro.
1561
+ * config/arm/unspecs.md (VUNSPEC_LAX): New unspec.
1562
+ (VUNSPEC_SLX): Likewise.
1563
+ (VUNSPEC_LDA): Likewise.
1564
+ (VUNSPEC_STL): Likewise.
1565
+ * config/arm/sync.md (atomic_load<mode>): New pattern.
1566
+ (atomic_store<mode>): Likewise.
1567
+ (arm_load_acquire_exclusive<mode>): Likewise.
1568
+ (arm_load_acquire_exclusivesi): Likewise.
1569
+ (arm_load_acquire_exclusivedi): Likewise.
1570
+ (arm_store_release_exclusive<mode>): Likewise.
1572
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1574
+ Backport from trunk r196876.
1575
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
1577
+ * config/arm/arm-protos.h (tune_params): Add
1578
+ prefer_neon_for_64bits field.
1579
+ * config/arm/arm.c (prefer_neon_for_64bits): New variable.
1580
+ (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
1581
+ (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
1582
+ (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
1583
+ (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
1584
+ (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
1585
+ (arm_option_override): Handle -mneon-for-64bits new option.
1586
+ * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
1587
+ (prefer_neon_for_64bits): Declare new variable.
1588
+ * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
1589
+ avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
1591
+ (arch_enabled): Handle new arch types. Remove support for onlya8
1593
+ (one_cmpldi2): Use new arch names.
1594
+ * config/arm/arm.opt (mneon-for-64bits): Add option.
1595
+ * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
1596
+ (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
1597
+ neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
1599
+ * doc/invoke.texi (-mneon-for-64bits): Document.
1601
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1603
+ Backport from trunk r196858.
1604
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1606
+ * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute.
1607
+ (abd<mode>_3): New pattern.
1608
+ (aba<mode>_3): New pattern.
1609
+ (fabd<mode>_3): New pattern.
1611
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1613
+ Backport from trunk r196856.
1614
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1616
+ * config/aarch64/aarch64-elf.h (REGISTER_PREFIX): Remove.
1617
+ * config/aarch64/aarch64.c (aarch64_print_operand): Remove all
1618
+ occurrence of REGISTER_PREFIX as its empty string.
1619
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1620
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1622
+/* { dg-do compile } */
1623
+/* { dg-require-effective-target arm_v8_neon_ok } */
1624
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1625
+/* { dg-add-options arm_v8_neon } */
1630
+foo (float *output, float *input)
1633
+ /* Vectorizable. */
1634
+ for (i = 0; i < N; i++)
1635
+ output[i] = __builtin_floorf (input[i]);
1638
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_floorf } } } */
1639
+/* { dg-final { cleanup-tree-dump "vect" } } */
1640
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1641
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1643
+/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
1644
+/* This file was autogenerated by neon-testgen. */
1646
+/* { dg-do assemble } */
1647
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1648
+/* { dg-options "-save-temps -O0" } */
1649
+/* { dg-add-options arm_neon_fp16 } */
1651
+#include "arm_neon.h"
1653
+void test_vcvtf32_f16 (void)
1655
+ float32x4_t out_float32x4_t;
1656
+ float16x4_t arg0_float16x4_t;
1658
+ out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
1661
+/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1662
+/* { dg-final { cleanup-saved-temps } } */
1663
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1664
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1666
+/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
1667
+/* This file was autogenerated by neon-testgen. */
1669
+/* { dg-do assemble } */
1670
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1671
+/* { dg-options "-save-temps -O0" } */
1672
+/* { dg-add-options arm_neon_fp16 } */
1674
+#include "arm_neon.h"
1676
+void test_vcvtf16_f32 (void)
1678
+ float16x4_t out_float16x4_t;
1679
+ float32x4_t arg0_float32x4_t;
1681
+ out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
1684
+/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1685
+/* { dg-final { cleanup-saved-temps } } */
1686
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1687
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1689
+/* { dg-do compile } */
1690
+/* { dg-options "-O1" } */
1693
+muld (unsigned long long X, unsigned long long Y)
1695
+ unsigned long long mask = 0xffffffffull;
1696
+ return (X & mask) * (Y & mask);
1699
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
1700
--- a/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1701
+++ b/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1703
+/* { dg-do compile } */
1704
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1705
+/* { dg-options "-O2" } */
1706
+int foo(int a, int b, int* p, int *q)
1713
+/* { dg-final { scan-assembler "ldrd" } } */
1714
--- a/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1715
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1717
+/* { dg-do compile } */
1718
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1719
+/* { dg-options "-O2" } */
1720
+/* { dg-add-options arm_v8_vfp } */
1723
+foo (double x, double y)
1725
+ volatile int i = 0;
1726
+ return i > 0 ? x : y;
1729
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
1730
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1731
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1733
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1734
+/* { dg-do compile } */
1735
+/* { dg-options "-O2" } */
1736
+/* { dg-add-options arm_arch_v8a } */
1738
+#include "../aarch64/atomic-op-relaxed.x"
1740
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1741
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1742
+/* { dg-final { scan-assembler-not "dmb" } } */
1743
--- a/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1744
+++ b/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1746
+/* { dg-do compile } */
1747
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1748
+/* { dg-options "-O2" } */
1749
+/* { dg-add-options arm_v8_vfp } */
1752
+foo (float x, float y)
1754
+ volatile int i = 0;
1755
+ return i >= 0 ? x : y;
1758
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
1759
--- a/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1760
+++ b/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1762
+/* { dg-do compile } */
1763
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1764
+/* { dg-options "-O2" } */
1765
+void foo(int a, int b, int* p)
1770
+/* { dg-final { scan-assembler "strd" } } */
1771
--- a/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1772
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1774
+/* { dg-do compile } */
1775
+/* { dg-require-effective-target arm32 } */
1776
+/* { dg-options "-O2" } */
1778
+signed long long extendsidi_negsi (signed int x)
1786
+ mov r1, r0, asr #31
1788
+/* { dg-final { scan-assembler-times "rsb" 1 { target { arm_nothumb } } } } */
1789
+/* { dg-final { scan-assembler-times "negs\\t" 1 { target { ! { arm_nothumb } } } } } */
1790
+/* { dg-final { scan-assembler-times "asr" 1 } } */
1791
--- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1792
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1794
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1795
+/* { dg-do compile } */
1796
+/* { dg-options "-O2" } */
1797
+/* { dg-add-options arm_arch_v8a } */
1799
+#include "../aarch64/atomic-comp-swap-release-acquire.x"
1801
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 4 } } */
1802
+/* { dg-final { scan-assembler-times "stlex" 4 } } */
1803
+/* { dg-final { scan-assembler-not "dmb" } } */
1804
--- a/src/gcc/testsuite/gcc.target/arm/pr19599.c
1805
+++ b/src/gcc/testsuite/gcc.target/arm/pr19599.c
1807
+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" } { "" } } */
1808
+/* { dg-options "-O2 -march=armv5te -marm" } */
1809
+/* { dg-final { scan-assembler "bx" } } */
1811
+int (*indirect_func)();
1813
+int indirect_call()
1815
+ return indirect_func();
1817
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1818
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1820
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1821
+/* { dg-do compile } */
1822
+/* { dg-options "-O2" } */
1823
+/* { dg-add-options arm_arch_v8a } */
1825
+#include "../aarch64/atomic-op-seq_cst.x"
1827
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1828
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1829
+/* { dg-final { scan-assembler-not "dmb" } } */
1830
--- a/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1831
+++ b/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1833
+/* { dg-do compile } */
1834
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1835
+/* { dg-options "-O2" } */
1836
+/* { dg-add-options arm_v8_vfp } */
1839
+foo (double x, double y)
1841
+ volatile int i = 0;
1842
+ return i >= 0 ? x : y;
1845
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
1846
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1847
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1849
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1850
+/* { dg-do compile } */
1851
+/* { dg-options "-O2" } */
1852
+/* { dg-add-options arm_arch_v8a } */
1854
+#include "../aarch64/atomic-op-consume.x"
1856
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1857
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1858
+/* { dg-final { scan-assembler-not "dmb" } } */
1859
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1860
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1862
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1863
+/* { dg-do compile } */
1864
+/* { dg-options "-O2" } */
1865
+/* { dg-add-options arm_arch_v8a } */
1867
+#include "../aarch64/atomic-op-char.x"
1869
+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1870
+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1871
+/* { dg-final { scan-assembler-not "dmb" } } */
1872
--- a/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1873
+++ b/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1875
+/* { dg-do compile } */
1876
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1877
+/* { dg-options "-O2" } */
1878
+/* { dg-add-options arm_v8_vfp } */
1881
+foo (float x, float y)
1883
+ volatile int i = 0;
1884
+ return i != 0 ? x : y;
1887
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
1888
--- a/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1889
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1891
+/* { dg-do compile } */
1892
+/* { dg-require-effective-target arm32 } */
1893
+/* { dg-options "-O2" } */
1895
+signed long long zero_extendsidi_negsi (unsigned int x)
1904
+/* { dg-final { scan-assembler-times "rsb\\tr0, r0, #0" 1 { target { arm_nothumb } } } } */
1905
+/* { dg-final { scan-assembler-times "negs\\tr0, r0" 1 { target { ! arm_nothumb } } } } */
1906
+/* { dg-final { scan-assembler-times "mov" 1 } } */
1907
--- a/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1908
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1910
+/* { dg-do compile } */
1911
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1912
+/* { dg-options "-O2" } */
1913
+/* { dg-add-options arm_v8_vfp } */
1916
+foo (float x, float y)
1918
+ return !__builtin_isunordered (x, y) ? x : y;
1921
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1922
--- a/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1923
+++ b/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1925
+/* { dg-do compile } */
1926
+/* { dg-options "-O2" } */
1928
+#define MAX(a, b) (a > b ? a : b)
1930
+foo (int a, int b, int c)
1932
+ return c - MAX (a, b);
1935
+/* { dg-final { scan-assembler "rsbge" } } */
1936
+/* { dg-final { scan-assembler "rsblt" } } */
1937
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1938
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1940
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1941
+/* { dg-do compile } */
1942
+/* { dg-options "-O2" } */
1943
+/* { dg-add-options arm_arch_v8a } */
1945
+#include "../aarch64/atomic-op-release.x"
1947
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1948
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1949
+/* { dg-final { scan-assembler-not "dmb" } } */
1950
--- a/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1951
+++ b/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1953
+/* { dg-do compile } */
1954
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1955
+/* { dg-options "-O2" } */
1956
+/* { dg-add-options arm_v8_vfp } */
1959
+foo (float x, float y)
1961
+ return __builtin_isunordered (x, y) ? x : y;
1964
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1965
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1966
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1968
+/* { dg-do compile } */
1969
+/* { dg-require-effective-target arm_v8_neon_ok } */
1970
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1971
+/* { dg-add-options arm_v8_neon } */
1976
+foo (float *output, float *input)
1979
+ /* Vectorizable. */
1980
+ for (i = 0; i < N; i++)
1981
+ output[i] = __builtin_roundf (input[i]);
1984
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_roundf } } } */
1985
+/* { dg-final { cleanup-tree-dump "vect" } } */
1986
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1987
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1989
+/* Check that Neon is *not* used by default to handle 64-bits scalar
1992
+/* { dg-do compile } */
1993
+/* { dg-require-effective-target arm_neon_ok } */
1994
+/* { dg-options "-O2" } */
1995
+/* { dg-add-options arm_neon } */
1997
+typedef long long i64;
1998
+typedef unsigned long long u64;
1999
+typedef unsigned int u32;
2002
+/* Unary operators */
2003
+#define UNARY_OP(name, op) \
2004
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
2006
+/* Binary operators */
2007
+#define BINARY_OP(name, op) \
2008
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
2010
+/* Unsigned shift */
2011
+#define SHIFT_U(name, op, amount) \
2012
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
2015
+#define SHIFT_S(name, op, amount) \
2016
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
2026
+SHIFT_U(right1, >>, 1)
2027
+SHIFT_U(right2, >>, 2)
2028
+SHIFT_U(right5, >>, 5)
2029
+SHIFT_U(rightn, >>, c)
2031
+SHIFT_S(right1, >>, 1)
2032
+SHIFT_S(right2, >>, 2)
2033
+SHIFT_S(right5, >>, 5)
2034
+SHIFT_S(rightn, >>, c)
2036
+/* { dg-final {scan-assembler-times "vmvn" 0} } */
2037
+/* { dg-final {scan-assembler-times "vadd" 0} } */
2038
+/* { dg-final {scan-assembler-times "vsub" 0} } */
2039
+/* { dg-final {scan-assembler-times "vand" 0} } */
2040
+/* { dg-final {scan-assembler-times "vorr" 0} } */
2041
+/* { dg-final {scan-assembler-times "veor" 0} } */
2042
+/* { dg-final {scan-assembler-times "vshr" 0} } */
2043
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
2044
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
2046
/* Expect a multi-word store for the main part of the copy, but subword
2047
loads/stores for the remainder. */
2049
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
2050
+/* { dg-final { scan-assembler-times "ldmia" 0 } } */
2051
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
2052
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2053
+/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */
2054
/* { dg-final { scan-assembler-times "ldrh" 1 } } */
2055
/* { dg-final { scan-assembler-times "strh" 1 } } */
2056
/* { dg-final { scan-assembler-times "ldrb" 1 } } */
2057
--- a/src/gcc/testsuite/gcc.target/arm/negdi-3.c
2058
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-3.c
2060
+/* { dg-do compile } */
2061
+/* { dg-require-effective-target arm32 } */
2062
+/* { dg-options "-O2" } */
2064
+signed long long negdi_zero_extendsidi (unsigned int x)
2066
+ return -((signed long long) x);
2073
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
2074
+/* { dg-final { scan-assembler-times "sbc" 1 } } */
2075
+/* { dg-final { scan-assembler-times "mov" 0 } } */
2076
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
2077
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
2078
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
2080
+/* { dg-require-effective-target arm_arch_v8a_ok } */
2081
+/* { dg-do compile } */
2082
+/* { dg-options "-O2" } */
2083
+/* { dg-add-options arm_arch_v8a } */
2085
+#include "../aarch64/atomic-op-acq_rel.x"
2087
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2088
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2089
+/* { dg-final { scan-assembler-not "dmb" } } */
2090
--- a/src/gcc/testsuite/gcc.target/arm/vselltsf.c
2091
+++ b/src/gcc/testsuite/gcc.target/arm/vselltsf.c
2093
+/* { dg-do compile } */
2094
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2095
+/* { dg-options "-O2" } */
2096
+/* { dg-add-options arm_v8_vfp } */
2099
+foo (float x, float y)
2101
+ volatile int i = 0;
2102
+ return i < 0 ? x : y;
2105
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
2106
--- a/src/gcc/testsuite/gcc.target/arm/vselnedf.c
2107
+++ b/src/gcc/testsuite/gcc.target/arm/vselnedf.c
2109
+/* { dg-do compile } */
2110
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2111
+/* { dg-options "-O2" } */
2112
+/* { dg-add-options arm_v8_vfp } */
2115
+foo (double x, double y)
2117
+ volatile int i = 0;
2118
+ return i != 0 ? x : y;
2121
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
2122
--- a/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
2123
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
2125
+/* { dg-do compile } */
2126
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2127
+/* { dg-options "-O2" } */
2128
+/* { dg-add-options arm_v8_vfp } */
2131
+foo (double x, double y)
2133
+ return !__builtin_isunordered (x, y) ? x : y;
2136
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
2137
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
2138
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
2140
+/* { dg-do compile } */
2141
+/* { dg-require-effective-target arm_v8_neon_ok } */
2142
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
2143
+/* { dg-add-options arm_v8_neon } */
2148
+foo (float *output, float *input)
2151
+ /* Vectorizable. */
2152
+ for (i = 0; i < N; i++)
2153
+ output[i] = __builtin_truncf (input[i]);
2156
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_btruncf } } } */
2157
+/* { dg-final { cleanup-tree-dump "vect" } } */
2158
--- a/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
2159
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
2161
+/* { dg-do compile } */
2162
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2163
+/* { dg-options "-O2" } */
2164
+/* { dg-add-options arm_v8_vfp } */
2167
+foo (float x, float y)
2169
+ volatile int i = 0;
2170
+ return i == 0 ? x : y;
2173
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
2174
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
2175
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
2177
+/* Check that Neon is used to handle 64-bits scalar operations. */
2179
+/* { dg-do compile } */
2180
+/* { dg-require-effective-target arm_neon_ok } */
2181
+/* { dg-options "-O2 -mneon-for-64bits" } */
2182
+/* { dg-add-options arm_neon } */
2184
+typedef long long i64;
2185
+typedef unsigned long long u64;
2186
+typedef unsigned int u32;
2189
+/* Unary operators */
2190
+#define UNARY_OP(name, op) \
2191
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
2193
+/* Binary operators */
2194
+#define BINARY_OP(name, op) \
2195
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
2197
+/* Unsigned shift */
2198
+#define SHIFT_U(name, op, amount) \
2199
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
2202
+#define SHIFT_S(name, op, amount) \
2203
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
2213
+SHIFT_U(right1, >>, 1)
2214
+SHIFT_U(right2, >>, 2)
2215
+SHIFT_U(right5, >>, 5)
2216
+SHIFT_U(rightn, >>, c)
2218
+SHIFT_S(right1, >>, 1)
2219
+SHIFT_S(right2, >>, 2)
2220
+SHIFT_S(right5, >>, 5)
2221
+SHIFT_S(rightn, >>, c)
2223
+/* { dg-final {scan-assembler-times "vmvn" 1} } */
2224
+/* Two vadd: 1 in unary_not, 1 in binary_add */
2225
+/* { dg-final {scan-assembler-times "vadd" 2} } */
2226
+/* { dg-final {scan-assembler-times "vsub" 1} } */
2227
+/* { dg-final {scan-assembler-times "vand" 1} } */
2228
+/* { dg-final {scan-assembler-times "vorr" 1} } */
2229
+/* { dg-final {scan-assembler-times "veor" 1} } */
2230
+/* 6 vshr for right shifts by constant, and variable right shift uses
2231
+ vshl with a negative amount in register. */
2232
+/* { dg-final {scan-assembler-times "vshr" 6} } */
2233
+/* { dg-final {scan-assembler-times "vshl" 2} } */
2234
--- a/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
2235
+++ b/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
2237
+/* { dg-do compile } */
2238
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2239
+/* { dg-options "-O2" } */
2240
+/* { dg-add-options arm_v8_vfp } */
2243
+foo (double x, double y)
2245
+ return __builtin_isunordered (x, y) ? x : y;
2248
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
2249
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
2250
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
2252
/* Expect a multi-word load for the main part of the copy, but subword
2253
loads/stores for the remainder. */
2255
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
2256
-/* { dg-final { scan-assembler-times "ldrh" 1 } } */
2257
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2258
+/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */
2259
+/* { dg-final { scan-assembler-times "strd" 0 } } */
2260
+/* { dg-final { scan-assembler-times "stm" 0 } } */
2261
+/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2262
/* { dg-final { scan-assembler-times "strh" 1 } } */
2263
-/* { dg-final { scan-assembler-times "ldrb" 1 } } */
2264
+/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2265
/* { dg-final { scan-assembler-times "strb" 1 } } */
2266
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
2267
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
2269
+/* { dg-do compile } */
2270
+/* { dg-options "-O1" } */
2272
+long long muld(long long X, long long Y)
2277
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
2278
--- a/src/gcc/testsuite/gcc.target/arm/negdi-4.c
2279
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-4.c
2281
+/* { dg-do compile } */
2282
+/* { dg-require-effective-target arm32 } */
2283
+/* { dg-options "-O2" } */
2285
+signed long long negdi_extendsidi (signed int x)
2287
+ return -((signed long long) x);
2292
+ mov r1, r0, asr #31
2294
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
2295
+/* { dg-final { scan-assembler-times "asr" 1 } } */
2296
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
2297
--- a/src/gcc/testsuite/gcc.target/arm/vselltdf.c
2298
+++ b/src/gcc/testsuite/gcc.target/arm/vselltdf.c
2300
+/* { dg-do compile } */
2301
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2302
+/* { dg-options "-O2" } */
2303
+/* { dg-add-options arm_v8_vfp } */
2306
+foo (double x, double y)
2308
+ volatile int i = 0;
2309
+ return i < 0 ? x : y;
2312
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
2313
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
2314
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
2317
/* We know both src and dest to be aligned: expect multiword loads/stores. */
2319
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
2320
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
2321
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2322
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
2323
+/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */
2324
+/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */
2325
+/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */
2326
+/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */
2327
--- a/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
2328
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
2330
+/* { dg-do compile } */
2331
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2332
+/* { dg-options "-O2" } */
2333
+/* { dg-add-options arm_v8_vfp } */
2336
+foo (double x, double y)
2338
+ volatile int i = 0;
2339
+ return i == 0 ? x : y;
2342
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
2343
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
2344
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
2346
+/* { dg-require-effective-target arm_arch_v8a_ok } */
2347
+/* { dg-do compile } */
2348
+/* { dg-options "-O2" } */
2349
+/* { dg-add-options arm_arch_v8a } */
2351
+#include "../aarch64/atomic-op-acquire.x"
2353
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2354
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2355
+/* { dg-final { scan-assembler-not "dmb" } } */
2356
--- a/src/gcc/testsuite/gcc.target/arm/vsellesf.c
2357
+++ b/src/gcc/testsuite/gcc.target/arm/vsellesf.c
2359
+/* { dg-do compile } */
2360
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2361
+/* { dg-options "-O2" } */
2362
+/* { dg-add-options arm_v8_vfp } */
2365
+foo (float x, float y)
2367
+ volatile int i = 0;
2368
+ return i <= 0 ? x : y;
2371
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
2372
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
2373
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
2375
+/* { dg-require-effective-target arm_arch_v8a_ok } */
2376
+/* { dg-do compile } */
2377
+/* { dg-options "-O2" } */
2378
+/* { dg-add-options arm_arch_v8a } */
2380
+#include "../aarch64/atomic-op-int.x"
2382
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2383
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2384
+/* { dg-final { scan-assembler-not "dmb" } } */
2385
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
2386
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
2388
+/* { dg-require-effective-target arm_arch_v8a_ok } */
2389
+/* { dg-do compile } */
2390
+/* { dg-options "-O2" } */
2391
+/* { dg-add-options arm_arch_v8a } */
2393
+#include "../aarch64/atomic-op-short.x"
2395
+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2396
+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
2397
+/* { dg-final { scan-assembler-not "dmb" } } */
2398
--- a/src/gcc/testsuite/gcc.target/arm/pr40887.c
2399
+++ b/src/gcc/testsuite/gcc.target/arm/pr40887.c
2401
/* { dg-options "-O2 -march=armv5te" } */
2402
/* { dg-final { scan-assembler "blx" } } */
2404
-int (*indirect_func)();
2405
+int (*indirect_func)(int x);
2409
- return indirect_func();
2410
+ return indirect_func(20) + indirect_func (40);
2412
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
2413
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
2415
+/* { dg-do compile } */
2416
+/* { dg-require-effective-target arm_v8_neon_ok } */
2417
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
2418
+/* { dg-add-options arm_v8_neon } */
2423
+foo (float *output, float *input)
2426
+ /* Vectorizable. */
2427
+ for (i = 0; i < N; i++)
2428
+ output[i] = __builtin_ceilf (input[i]);
2431
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_ceilf } } } */
2432
+/* { dg-final { cleanup-tree-dump "vect" } } */
2433
--- a/src/gcc/testsuite/gcc.target/arm/vselledf.c
2434
+++ b/src/gcc/testsuite/gcc.target/arm/vselledf.c
2436
+/* { dg-do compile } */
2437
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2438
+/* { dg-options "-O2" } */
2439
+/* { dg-add-options arm_v8_vfp } */
2442
+foo (double x, double y)
2444
+ volatile int i = 0;
2445
+ return i <= 0 ? x : y;
2448
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
2449
--- a/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
2450
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
2452
+/* { dg-do compile } */
2453
+/* { dg-require-effective-target arm_v8_vfp_ok } */
2454
+/* { dg-options "-O2" } */
2455
+/* { dg-add-options arm_v8_vfp } */
2458
+foo (float x, float y)
2460
+ volatile int i = 0;
2461
+ return i > 0 ? x : y;
2464
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
2465
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
2466
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
2468
+/* { dg-do run } */
2469
+/* { dg-options "-O3 --save-temps" } */
2471
+#include <arm_neon.h>
2473
+#include <stdlib.h>
2476
+test_frecps_float32_t (void)
2479
+ float32_t value = 0.2;
2480
+ float32_t reciprocal = 5.0;
2481
+ float32_t step = vrecpes_f32 (value);
2482
+ /* 3 steps should give us within ~0.001 accuracy. */
2483
+ for (i = 0; i < 3; i++)
2484
+ step = step * vrecpss_f32 (step, value);
2486
+ return fabs (step - reciprocal) < 0.001;
2489
+/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
2490
+/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
2493
+test_frecps_float32x2_t (void)
2498
+ const float32_t value_pool[] = {0.2, 0.4};
2499
+ const float32_t reciprocal_pool[] = {5.0, 2.5};
2500
+ float32x2_t value = vld1_f32 (value_pool);
2501
+ float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
2503
+ float32x2_t step = vrecpe_f32 (value);
2504
+ /* 3 steps should give us within ~0.001 accuracy. */
2505
+ for (i = 0; i < 3; i++)
2506
+ step = step * vrecps_f32 (step, value);
2508
+ ret &= fabs (vget_lane_f32 (step, 0)
2509
+ - vget_lane_f32 (reciprocal, 0)) < 0.001;
2510
+ ret &= fabs (vget_lane_f32 (step, 1)
2511
+ - vget_lane_f32 (reciprocal, 1)) < 0.001;
2516
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
2517
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
2520
+test_frecps_float32x4_t (void)
2525
+ const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
2526
+ const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
2527
+ float32x4_t value = vld1q_f32 (value_pool);
2528
+ float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
2530
+ float32x4_t step = vrecpeq_f32 (value);
2531
+ /* 3 steps should give us within ~0.001 accuracy. */
2532
+ for (i = 0; i < 3; i++)
2533
+ step = step * vrecpsq_f32 (step, value);
2535
+ ret &= fabs (vgetq_lane_f32 (step, 0)
2536
+ - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
2537
+ ret &= fabs (vgetq_lane_f32 (step, 1)
2538
+ - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
2539
+ ret &= fabs (vgetq_lane_f32 (step, 2)
2540
+ - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
2541
+ ret &= fabs (vgetq_lane_f32 (step, 3)
2542
+ - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
2547
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
2548
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
2551
+test_frecps_float64_t (void)
2554
+ float64_t value = 0.2;
2555
+ float64_t reciprocal = 5.0;
2556
+ float64_t step = vrecped_f64 (value);
2557
+ /* 3 steps should give us within ~0.001 accuracy. */
2558
+ for (i = 0; i < 3; i++)
2559
+ step = step * vrecpsd_f64 (step, value);
2561
+ return fabs (step - reciprocal) < 0.001;
2564
+/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
2565
+/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2568
+test_frecps_float64x2_t (void)
2573
+ const float64_t value_pool[] = {0.2, 0.4};
2574
+ const float64_t reciprocal_pool[] = {5.0, 2.5};
2575
+ float64x2_t value = vld1q_f64 (value_pool);
2576
+ float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
2578
+ float64x2_t step = vrecpeq_f64 (value);
2579
+ /* 3 steps should give us within ~0.001 accuracy. */
2580
+ for (i = 0; i < 3; i++)
2581
+ step = step * vrecpsq_f64 (step, value);
2583
+ ret &= fabs (vgetq_lane_f64 (step, 0)
2584
+ - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
2585
+ ret &= fabs (vgetq_lane_f64 (step, 1)
2586
+ - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
2591
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
2592
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
2595
+main (int argc, char **argv)
2597
+ if (!test_frecps_float32_t ())
2599
+ if (!test_frecps_float32x2_t ())
2601
+ if (!test_frecps_float32x4_t ())
2603
+ if (!test_frecps_float64_t ())
2605
+ if (!test_frecps_float64x2_t ())
2611
+/* { dg-final { cleanup-saved-temps } } */
2612
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
2613
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
2615
+/* { dg-do run } */
2616
+/* { dg-options "-O3 --save-temps" } */
2618
+#include <arm_neon.h>
2620
+extern void abort (void);
2621
+extern float fabsf (float);
2622
+extern double fabs (double);
2624
+#define NUM_TESTS 8
2626
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
2627
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
2628
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
2629
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
2631
+#define TEST(TEST, CMP, SUFFIX, WIDTH, F) \
2633
+test_fca##TEST##SUFFIX##_float##WIDTH##_t (void) \
2637
+ uint##WIDTH##_t output[NUM_TESTS]; \
2639
+ for (i = 0; i < NUM_TESTS; i++) \
2641
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
2642
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
2643
+ /* Inhibit optimization of our linear test loop. */ \
2644
+ asm volatile ("" : : : "memory"); \
2645
+ output[i] = f1 CMP f2 ? -1 : 0; \
2648
+ for (i = 0; i < NUM_TESTS; i++) \
2650
+ output[i] = vca##TEST##SUFFIX##_f##WIDTH (input_##SUFFIX##1[i], \
2651
+ input_##SUFFIX##2[i]) \
2653
+ /* Inhibit autovectorization of our scalar test loop. */ \
2654
+ asm volatile ("" : : : "memory"); \
2657
+ for (i = 0; i < NUM_TESTS; i++) \
2658
+ ret |= output[i]; \
2663
+TEST (ge, >=, s, 32, f)
2664
+/* { dg-final { scan-assembler "facge\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
2665
+TEST (ge, >=, d, 64, )
2666
+/* { dg-final { scan-assembler "facge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2667
+TEST (gt, >, s, 32, f)
2668
+/* { dg-final { scan-assembler "facgt\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
2669
+TEST (gt, >, d, 64, )
2670
+/* { dg-final { scan-assembler "facgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2673
+main (int argc, char **argv)
2675
+ if (test_fcages_float32_t ())
2677
+ if (test_fcaged_float64_t ())
2679
+ if (test_fcagts_float32_t ())
2681
+ if (test_fcagtd_float64_t ())
2686
+/* { dg-final { cleanup-saved-temps } } */
2687
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
2688
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
2693
+atomic_fetch_add_ACQ_REL (int a)
2695
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
2699
+atomic_fetch_sub_ACQ_REL (int a)
2701
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
2705
+atomic_fetch_and_ACQ_REL (int a)
2707
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
2711
+atomic_fetch_nand_ACQ_REL (int a)
2713
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
2717
+atomic_fetch_xor_ACQ_REL (int a)
2719
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
2723
+atomic_fetch_or_ACQ_REL (int a)
2725
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
2727
--- a/src/gcc/testsuite/gcc.target/aarch64/extr.c
2728
+++ b/src/gcc/testsuite/gcc.target/aarch64/extr.c
2730
+/* { dg-options "-O2 --save-temps" } */
2731
+/* { dg-do run } */
2733
+extern void abort (void);
2736
+test_si (int a, int b)
2738
+ /* { dg-final { scan-assembler "extr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, 27\n" } } */
2739
+ return (a << 5) | ((unsigned int) b >> 27);
2743
+test_di (long long a, long long b)
2745
+ /* { dg-final { scan-assembler "extr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, 45\n" } } */
2746
+ return (a << 19) | ((unsigned long long) b >> 45);
2754
+ v = test_si (0x00000004, 0x30000000);
2755
+ if (v != 0x00000086)
2757
+ w = test_di (0x0001040040040004ll, 0x0070050066666666ll);
2758
+ if (w != 0x2002002000200380ll)
2763
+/* { dg-final { cleanup-saved-temps } } */
2764
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2765
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2767
/* { dg-final { scan-assembler "uminv" } } */
2768
/* { dg-final { scan-assembler "smaxv" } } */
2769
/* { dg-final { scan-assembler "sminv" } } */
2770
+/* { dg-final { scan-assembler "sabd" } } */
2771
+/* { dg-final { scan-assembler "saba" } } */
2772
/* { dg-final { scan-assembler-times "addv" 2} } */
2773
/* { dg-final { scan-assembler-times "addp" 2} } */
2774
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
2775
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
2777
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
2779
#define FTYPE double
2784
#include "vect-fcm.x"
2786
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
2787
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
2788
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2789
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
2790
/* { dg-final { cleanup-tree-dump "vect" } } */
2791
--- a/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2792
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2794
+/* { dg-do run } */
2795
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2797
+extern void abort (void);
2798
+typedef long long s64;
2801
+adds_ext (s64 a, int b, int c)
2812
+adds_shift_ext (s64 a, int b, int c)
2814
+ s64 d = (a + ((s64)b << 3));
2827
+ x = adds_ext (0x13000002ll, 41, 15);
2828
+ if (x != 318767203)
2831
+ x = adds_ext (0x50505050ll, 29, 4);
2832
+ if (x != 1347440782)
2835
+ x = adds_ext (0x12121212121ll, 2, 14);
2836
+ if (x != 555819315)
2839
+ x = adds_shift_ext (0x123456789ll, 4, 12);
2840
+ if (x != 591751097)
2843
+ x = adds_shift_ext (0x02020202ll, 9, 8);
2844
+ if (x != 33686107)
2847
+ x = adds_shift_ext (0x987987987987ll, 23, 41);
2848
+ if (x != -2020050305)
2854
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
2855
--- a/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2856
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2858
+/* { dg-do run } */
2859
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2861
+extern void abort (void);
2864
+subs_si_test1 (int a, int b, int c)
2868
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2869
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2877
+subs_si_test2 (int a, int b, int c)
2879
+ int d = a - 0xfff;
2881
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2882
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2890
+subs_si_test3 (int a, int b, int c)
2892
+ int d = a - (b << 3);
2894
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2895
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2902
+typedef long long s64;
2905
+subs_di_test1 (s64 a, s64 b, s64 c)
2909
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2910
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2918
+subs_di_test2 (s64 a, s64 b, s64 c)
2920
+ s64 d = a - 0x1000ll;
2922
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2923
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2931
+subs_di_test3 (s64 a, s64 b, s64 c)
2933
+ s64 d = a - (b << 3);
2935
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2936
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2948
+ x = subs_si_test1 (29, 4, 5);
2952
+ x = subs_si_test1 (5, 2, 20);
2956
+ x = subs_si_test2 (29, 4, 5);
2960
+ x = subs_si_test2 (1024, 2, 20);
2964
+ x = subs_si_test3 (35, 4, 5);
2968
+ x = subs_si_test3 (5, 2, 20);
2972
+ y = subs_di_test1 (0x130000029ll,
2976
+ if (y != 0x63505052e)
2979
+ y = subs_di_test1 (0x5000500050005ll,
2980
+ 0x2111211121112ll,
2981
+ 0x0000000002020ll);
2982
+ if (y != 0x5000500052025)
2985
+ y = subs_di_test2 (0x130000029ll,
2988
+ if (y != 0x95504f532)
2991
+ y = subs_di_test2 (0x540004100ll,
2994
+ if (y != 0x1065053309)
2997
+ y = subs_di_test3 (0x130000029ll,
3000
+ if (y != 0x63505052e)
3003
+ y = subs_di_test3 (0x130002900ll,
3006
+ if (y != 0x635052e05)
3012
+/* { dg-final { cleanup-saved-temps } } */
3013
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c
3014
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c
3016
+/* { dg-do run } */
3017
+/* { dg-options "-O3 --save-temps -ffast-math" } */
3019
+#include <arm_neon.h>
3021
+extern void abort (void);
3023
+#define NUM_TESTS 16
3024
+#define DELTA 0.000001
3026
+int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
3027
+ -4, 34, 110, -110, 6, 4, 75, -34};
3028
+int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
3029
+ -4, 34, 110, -110, 6, 4, 75, -34};
3030
+int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
3031
+ -4, 34, 110, -110, 6, 4, 75, -34};
3033
+uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
3034
+ 4, 34, 110, 110, 6, 4, 75, 34};
3035
+uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
3036
+ 4, 34, 110, 110, 6, 4, 75, 34};
3037
+uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
3038
+ 4, 34, 110, 110, 6, 4, 75, 34};
3040
+#define EQUAL(a, b) (a == b)
3042
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
3044
+test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
3047
+ int moves = (NUM_TESTS - LANES) + 1; \
3048
+ TYPE##_t out_l[NUM_TESTS]; \
3049
+ TYPE##_t out_v[NUM_TESTS]; \
3051
+ /* Calculate linearly. */ \
3052
+ for (i = 0; i < moves; i++) \
3054
+ out_l[i] = input_##TYPE[i]; \
3055
+ for (j = 0; j < LANES; j++) \
3056
+ out_l[i] = input_##TYPE[i + j] CMP_OP out_l[i] ? \
3057
+ input_##TYPE[i + j] : out_l[i]; \
3060
+ /* Calculate using vector reduction intrinsics. */ \
3061
+ for (i = 0; i < moves; i++) \
3063
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
3064
+ out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
3068
+ for (i = 0; i < moves; i++) \
3070
+ if (!EQUAL (out_v[i], out_l[i])) \
3076
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64) \
3077
+TEST (max, >, STYPE, , TYPE, W32) \
3078
+TEST (max, >, STYPE, q, TYPE, W64) \
3079
+TEST (min, <, STYPE, , TYPE, W32) \
3080
+TEST (min, <, STYPE, q, TYPE, W64)
3082
+BUILD_VARIANTS (int8, s8, 8, 16)
3083
+/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
3084
+/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
3085
+/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
3086
+/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
3087
+BUILD_VARIANTS (uint8, u8, 8, 16)
3088
+/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
3089
+/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
3090
+/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
3091
+/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
3092
+BUILD_VARIANTS (int16, s16, 4, 8)
3093
+/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
3094
+/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
3095
+/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
3096
+/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
3097
+BUILD_VARIANTS (uint16, u16, 4, 8)
3098
+/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
3099
+/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
3100
+/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
3101
+/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
3102
+BUILD_VARIANTS (int32, s32, 2, 4)
3103
+/* { dg-final { scan-assembler "smaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3104
+/* { dg-final { scan-assembler "sminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3105
+/* { dg-final { scan-assembler "smaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
3106
+/* { dg-final { scan-assembler "sminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
3107
+BUILD_VARIANTS (uint32, u32, 2, 4)
3108
+/* { dg-final { scan-assembler "umaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3109
+/* { dg-final { scan-assembler "uminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3110
+/* { dg-final { scan-assembler "umaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
3111
+/* { dg-final { scan-assembler "uminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
3114
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
3116
+ if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
3121
+main (int argc, char **argv)
3123
+ BUILD_VARIANTS (int8, s8, 8, 16)
3124
+ BUILD_VARIANTS (uint8, u8, 8, 16)
3125
+ BUILD_VARIANTS (int16, s16, 4, 8)
3126
+ BUILD_VARIANTS (uint16, u16, 4, 8)
3127
+ BUILD_VARIANTS (int32, s32, 2, 4)
3128
+ BUILD_VARIANTS (uint32, u32, 2, 4)
3132
+/* { dg-final { cleanup-saved-temps } } */
3133
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
3134
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
3136
+/* { dg-do run } */
3137
+/* { dg-options "-O3 --save-temps" } */
3139
+#include <arm_neon.h>
3141
+#include <stdlib.h>
3144
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
3145
+float32_t rec_f[] =
3146
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
3148
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
3149
+float32_t rec_d[] =
3150
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
3153
+test_frecpx_float32_t (void)
3157
+ for (i = 0; i < 8; i++)
3158
+ ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001;
3163
+/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */
3166
+test_frecpx_float64_t (void)
3170
+ for (i = 0; i < 8; i++)
3171
+ ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001;
3176
+/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */
3179
+main (int argc, char **argv)
3181
+ if (!test_frecpx_float32_t ())
3183
+ if (!test_frecpx_float64_t ())
3189
+/* { dg-final { cleanup-saved-temps } } */
3190
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
3191
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
3193
+/* { dg-do run } */
3194
+/* { dg-options "-O3 --save-temps" } */
3196
+#include <arm_neon.h>
3198
+extern void abort (void);
3199
+extern float fabsf (float);
3200
+extern double fabs (double);
3202
+#define NUM_TESTS 8
3204
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
3205
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
3206
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
3207
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
3209
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
3211
+test_vca##T##_float##WIDTH##x##LANES##_t (void) \
3215
+ uint##WIDTH##_t output[NUM_TESTS]; \
3217
+ for (i = 0; i < NUM_TESTS; i++) \
3219
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
3220
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
3221
+ /* Inhibit optimization of our linear test loop. */ \
3222
+ asm volatile ("" : : : "memory"); \
3223
+ output[i] = f1 CMP f2 ? -1 : 0; \
3226
+ for (i = 0; i < NUM_TESTS; i += LANES) \
3228
+ float##WIDTH##x##LANES##_t in1 = \
3229
+ vld1##Q##_f##WIDTH (input_##SUFFIX##1 + i); \
3230
+ float##WIDTH##x##LANES##_t in2 = \
3231
+ vld1##Q##_f##WIDTH (input_##SUFFIX##2 + i); \
3232
+ uint##WIDTH##x##LANES##_t expected_out = \
3233
+ vld1##Q##_u##WIDTH (output + i); \
3234
+ uint##WIDTH##x##LANES##_t out = \
3235
+ veor##Q##_u##WIDTH (vca##T##Q##_f##WIDTH (in1, in2), \
3237
+ vst1##Q##_u##WIDTH (output + i, out); \
3240
+ for (i = 0; i < NUM_TESTS; i++) \
3241
+ ret |= output[i]; \
3246
+#define BUILD_VARIANTS(T, CMP) \
3247
+TEST (T, CMP, s, 32, 2, , f) \
3248
+TEST (T, CMP, s, 32, 4, q, f) \
3249
+TEST (T, CMP, d, 64, 2, q, )
3251
+BUILD_VARIANTS (ge, >=)
3252
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3253
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3254
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3256
+BUILD_VARIANTS (gt, >)
3257
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3258
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3259
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3261
+/* No need for another scan-assembler as these tests
3262
+ also generate facge, facgt instructions. */
3263
+BUILD_VARIANTS (le, <=)
3264
+BUILD_VARIANTS (lt, <)
3267
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
3268
+if (test_vca##T##_float##WIDTH##x##LANES##_t ()) \
3272
+main (int argc, char **argv)
3274
+BUILD_VARIANTS (ge, >=)
3275
+BUILD_VARIANTS (gt, >)
3276
+BUILD_VARIANTS (le, <=)
3277
+BUILD_VARIANTS (lt, <)
3281
+/* { dg-final { cleanup-saved-temps } } */
3282
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
3283
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
3285
+/* { dg-do run } */
3286
+/* { dg-options "-O3 --save-temps" } */
3288
+#include <arm_neon.h>
3290
+extern void abort (void);
3291
+extern float fabsf (float);
3292
+extern double fabs (double);
3294
+extern double trunc (double);
3295
+extern double round (double);
3296
+extern double nearbyint (double);
3297
+extern double floor (double);
3298
+extern double ceil (double);
3299
+extern double rint (double);
3301
+extern float truncf (float);
3302
+extern float roundf (float);
3303
+extern float nearbyintf (float);
3304
+extern float floorf (float);
3305
+extern float ceilf (float);
3306
+extern float rintf (float);
3308
+#define NUM_TESTS 8
3309
+#define DELTA 0.000001
3311
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
3312
+ 200.0f, -800.0f, -13.0f, -0.5f};
3313
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
3314
+ 200.0, -800.0, -13.0, -0.5};
3316
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
3318
+test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \
3322
+ int nlanes = LANES; \
3323
+ float##WIDTH##_t expected_out[NUM_TESTS]; \
3324
+ float##WIDTH##_t actual_out[NUM_TESTS]; \
3326
+ for (i = 0; i < NUM_TESTS; i++) \
3328
+ expected_out[i] = C_FN##F (input_f##WIDTH[i]); \
3329
+ /* Don't vectorize this. */ \
3330
+ asm volatile ("" : : : "memory"); \
3333
+ /* Prevent the compiler from noticing these two loops do the same \
3334
+ thing and optimizing away the comparison. */ \
3335
+ asm volatile ("" : : : "memory"); \
3337
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
3339
+ float##WIDTH##x##LANES##_t out = \
3340
+ vrnd##SUFFIX##Q##_f##WIDTH \
3341
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
3342
+ vst1##Q##_f##WIDTH (actual_out + i, out); \
3345
+ for (i = 0; i < NUM_TESTS; i++) \
3346
+ ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \
3352
+#define BUILD_VARIANTS(SUFFIX, C_FN) \
3353
+TEST (SUFFIX, , 32, 2, C_FN, f) \
3354
+TEST (SUFFIX, q, 32, 4, C_FN, f) \
3355
+TEST (SUFFIX, q, 64, 2, C_FN, ) \
3357
+BUILD_VARIANTS ( , trunc)
3358
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3359
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3360
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3361
+BUILD_VARIANTS (a, round)
3362
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3363
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3364
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3365
+BUILD_VARIANTS (i, nearbyint)
3366
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3367
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3368
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3369
+BUILD_VARIANTS (m, floor)
3370
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3371
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3372
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3373
+BUILD_VARIANTS (p, ceil)
3374
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3375
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3376
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3377
+BUILD_VARIANTS (x, rint)
3378
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3379
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3380
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3383
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
3385
+ if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \
3390
+main (int argc, char **argv)
3392
+ BUILD_VARIANTS ( , trunc)
3393
+ BUILD_VARIANTS (a, round)
3394
+ BUILD_VARIANTS (i, nearbyint)
3395
+ BUILD_VARIANTS (m, floor)
3396
+ BUILD_VARIANTS (p, ceil)
3397
+ BUILD_VARIANTS (x, rint)
3401
+/* { dg-final { cleanup-saved-temps } } */
3402
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
3403
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
3405
/* { dg-do compile } */
3406
/* { dg-options "-O2" } */
3409
+#include "atomic-op-relaxed.x"
3412
-atomic_fetch_add_RELAXED (int a)
3414
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3418
-atomic_fetch_sub_RELAXED (int a)
3420
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3424
-atomic_fetch_and_RELAXED (int a)
3426
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3430
-atomic_fetch_nand_RELAXED (int a)
3432
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3436
-atomic_fetch_xor_RELAXED (int a)
3438
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3442
-atomic_fetch_or_RELAXED (int a)
3444
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3447
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3448
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3449
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
3450
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
3452
2.0, -4.0, 8.0, -16.0,
3453
-2.125, 4.25, -8.5, 17.0};
3455
+/* Float comparisons, float results. */
3458
foo (FTYPE *in1, FTYPE *in2, FTYPE *output)
3461
output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
3464
+/* Float comparisons, int results. */
3467
+foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
3470
+ /* Vectorizable. */
3471
+ for (i = 0; i < N; i++)
3472
+ output[i] = (in1[i] OP in2[i]) ? 2 : 4;
3476
+bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
3479
+ /* Vectorizable. */
3480
+ for (i = 0; i < N; i++)
3481
+ output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2;
3485
+foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
3488
+ /* Vectorizable. */
3489
+ for (i = 0; i < N; i++)
3490
+ output[i] = (in1[i] OP 0.0) ? 4 : 2;
3494
+foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
3497
+ /* Vectorizable. */
3498
+ for (i = 0; i < N; i++)
3499
+ output[i] = (in1[i] INV_OP 0.0) ? 4 : 2;
3503
main (int argc, char **argv)
3511
foo (input1, input2, out1);
3512
bar (input1, input2, out2);
3514
for (i = 0; i < N; i++)
3515
if (out1[i] == out2[i])
3518
+ foo_int (input1, input2, outi1);
3519
+ bar_int (input1, input2, outi2);
3520
+ for (i = 0; i < N; i++)
3521
+ if (outi1[i] != outi2[i])
3523
+ foobar_int (input1, input2, outi1);
3524
+ foobarbar_int (input1, input2, outi2);
3525
+ for (i = 0; i < N; i++)
3526
+ if (outi1[i] == outi2[i])
3531
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
3532
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
3535
+/* { dg-do compile } */
3536
+/* { dg-options "-O3" } */
3538
+#include "arm_neon.h"
3540
+#include "vaddv-intrinsic.x"
3542
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
3543
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
3544
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
3545
--- a/src/gcc/testsuite/gcc.target/aarch64/movi_1.c
3546
+++ b/src/gcc/testsuite/gcc.target/aarch64/movi_1.c
3548
+/* { dg-do compile } */
3549
+/* { dg-options "-O2" } */
3554
+ /* { dg-final { scan-assembler "movi\tv\[0-9\]+\.4h, 0x4, lsl 8" } } */
3555
+ /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 0x400" } } */
3556
+ /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 1024" } } */
3557
+ register short x asm ("h8") = 1024;
3558
+ asm volatile ("" : : "w" (x));
3561
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
3562
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
3567
+atomic_fetch_add_RELAXED (int a)
3569
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3573
+atomic_fetch_sub_RELAXED (int a)
3575
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3579
+atomic_fetch_and_RELAXED (int a)
3581
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3585
+atomic_fetch_nand_RELAXED (int a)
3587
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3591
+atomic_fetch_xor_RELAXED (int a)
3593
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3597
+atomic_fetch_or_RELAXED (int a)
3599
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3601
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.c
3602
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.c
3604
int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
3605
unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
3606
unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
3607
+ int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3608
+ int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3609
int reduce_smax_value = 0;
3610
int reduce_smin_value = -15;
3611
unsigned int reduce_umax_value = 15;
3618
TESTV (reduce_smax, s);
3619
TESTV (reduce_smin, s);
3620
TESTV (reduce_umax, u);
3621
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
3622
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
3624
+/* { dg-do compile } */
3625
+/* { dg-options "-g -mgeneral-regs-only" } */
3628
+foo (const char *c, ...)
3631
+ buf[256 - 1] = '\0';
3633
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
3634
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
3636
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
3638
#define FTYPE double
3643
#include "vect-fcm.x"
3645
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
3646
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
3647
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3648
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
3649
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
3650
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
3651
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
3653
/* { dg-do compile } */
3654
/* { dg-options "-O2" } */
3657
+#include "atomic-op-acquire.x"
3660
-atomic_fetch_add_ACQUIRE (int a)
3662
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
3666
-atomic_fetch_sub_ACQUIRE (int a)
3668
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
3672
-atomic_fetch_and_ACQUIRE (int a)
3674
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
3678
-atomic_fetch_nand_ACQUIRE (int a)
3680
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
3684
-atomic_fetch_xor_ACQUIRE (int a)
3686
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
3690
-atomic_fetch_or_ACQUIRE (int a)
3692
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
3695
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3696
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3697
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
3698
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
3700
/* { dg-do compile } */
3701
/* { dg-options "-O2" } */
3706
+#include "atomic-comp-swap-release-acquire.x"
3709
-atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
3711
- return __atomic_compare_exchange (&v, &a, &b,
3712
- STRONG, __ATOMIC_RELEASE,
3713
- __ATOMIC_ACQUIRE);
3717
-atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
3719
- return __atomic_compare_exchange (&v, &a, &b,
3720
- WEAK, __ATOMIC_RELEASE,
3721
- __ATOMIC_ACQUIRE);
3725
-atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
3727
- return __atomic_compare_exchange_n (&v, &a, b,
3728
- STRONG, __ATOMIC_RELEASE,
3729
- __ATOMIC_ACQUIRE);
3733
-atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
3735
- return __atomic_compare_exchange_n (&v, &a, b,
3736
- WEAK, __ATOMIC_RELEASE,
3737
- __ATOMIC_ACQUIRE);
3740
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
3741
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
3742
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
3743
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
3744
@@ -138,3 +138,17 @@
3749
+void sabd (pRINT a, pRINT b, pRINT c)
3752
+ for (i = 0; i < 16; i++)
3753
+ c[i] = abs (a[i] - b[i]);
3756
+void saba (pRINT a, pRINT b, pRINT c)
3759
+ for (i = 0; i < 16; i++)
3760
+ c[i] += abs (a[i] - b[i]);
3762
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c
3763
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c
3765
+/* { dg-do run } */
3766
+/* { dg-options "-O3 -save-temps -fno-inline" } */
3768
+extern void abort ();
3771
+count_lz_v4si (unsigned *__restrict a, int *__restrict b)
3775
+ for (i = 0; i < 4; i++)
3776
+ b[i] = __builtin_clz (a[i]);
3779
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
3784
+ unsigned int x[4] = { 0x0, 0xFFFF, 0x1FFFF, 0xFFFFFFFF };
3785
+ int r[4] = { 32, 16, 15, 0 };
3788
+ count_lz_v4si (x, d);
3790
+ for (i = 0; i < 4; i++)
3799
+/* { dg-final { cleanup-saved-temps } } */
3800
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
3801
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
3803
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
3810
#include "vect-fcm.x"
3812
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
3813
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
3814
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
3815
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
3816
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
3817
--- a/src/gcc/testsuite/gcc.target/aarch64/subs3.c
3818
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs3.c
3820
+/* { dg-do run } */
3821
+/* { dg-options "-O2 --save-temps -fno-inline" } */
3823
+extern void abort (void);
3824
+typedef long long s64;
3827
+subs_ext (s64 a, int b, int c)
3838
+subs_shift_ext (s64 a, int b, int c)
3840
+ s64 d = (a - ((s64)b << 3));
3853
+ x = subs_ext (0x13000002ll, 41, 15);
3854
+ if (x != 318767121)
3857
+ x = subs_ext (0x50505050ll, 29, 4);
3858
+ if (x != 1347440724)
3861
+ x = subs_ext (0x12121212121ll, 2, 14);
3862
+ if (x != 555819311)
3865
+ x = subs_shift_ext (0x123456789ll, 4, 12);
3866
+ if (x != 591751033)
3869
+ x = subs_shift_ext (0x02020202ll, 9, 8);
3870
+ if (x != 33685963)
3873
+ x = subs_shift_ext (0x987987987987ll, 23, 41);
3874
+ if (x != -2020050673)
3880
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
3881
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
3882
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
3887
+atomic_fetch_add_ACQUIRE (int a)
3889
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
3893
+atomic_fetch_sub_ACQUIRE (int a)
3895
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
3899
+atomic_fetch_and_ACQUIRE (int a)
3901
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
3905
+atomic_fetch_nand_ACQUIRE (int a)
3907
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
3911
+atomic_fetch_xor_ACQUIRE (int a)
3913
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
3917
+atomic_fetch_or_ACQUIRE (int a)
3919
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
3921
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
3922
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
3925
+/* { dg-do run } */
3926
+/* { dg-options "-O3" } */
3928
+#include "arm_neon.h"
3930
+extern void abort (void);
3932
+#include "vaddv-intrinsic.x"
3937
+ const float32_t pool_v2sf[] = {4.0f, 9.0f};
3938
+ const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
3939
+ const float64_t pool_v2df[] = {4.0, 9.0};
3941
+ if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
3944
+ if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
3947
+ if (test_vaddv_v2df (pool_v2df) != 13.0)
3952
--- a/src/gcc/testsuite/gcc.target/aarch64/sbc.c
3953
+++ b/src/gcc/testsuite/gcc.target/aarch64/sbc.c
3955
+/* { dg-do run } */
3956
+/* { dg-options "-O2 --save-temps" } */
3958
+extern void abort (void);
3960
+typedef unsigned int u32int;
3961
+typedef unsigned long long u64int;
3964
+test_si (u32int w1, u32int w2, u32int w3, u32int w4)
3967
+ /* { dg-final { scan-assembler "sbc\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+\n" } } */
3968
+ w0 = w1 - w2 - (w3 < w4);
3973
+test_di (u64int x1, u64int x2, u64int x3, u64int x4)
3976
+ /* { dg-final { scan-assembler "sbc\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+\n" } } */
3977
+ x0 = x1 - x2 - (x3 < x4);
3986
+ x = test_si (7, 8, 12, 15);
3989
+ y = test_di (0x987654321ll, 0x123456789ll, 0x345345345ll, 0x123123123ll);
3990
+ if (y != 0x8641fdb98ll)
3995
+/* { dg-final { cleanup-saved-temps } } */
3996
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
3997
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
4005
+atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
4007
+ return __atomic_compare_exchange (&v, &a, &b,
4008
+ STRONG, __ATOMIC_RELEASE,
4009
+ __ATOMIC_ACQUIRE);
4013
+atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
4015
+ return __atomic_compare_exchange (&v, &a, &b,
4016
+ WEAK, __ATOMIC_RELEASE,
4017
+ __ATOMIC_ACQUIRE);
4021
+atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
4023
+ return __atomic_compare_exchange_n (&v, &a, b,
4024
+ STRONG, __ATOMIC_RELEASE,
4025
+ __ATOMIC_ACQUIRE);
4029
+atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
4031
+ return __atomic_compare_exchange_n (&v, &a, b,
4032
+ WEAK, __ATOMIC_RELEASE,
4033
+ __ATOMIC_ACQUIRE);
4035
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
4036
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
4038
/* { dg-do compile } */
4039
/* { dg-options "-O2" } */
4041
-#include "../../../config/aarch64/arm_neon.h"
4042
+#include <arm_neon.h>
4044
+/* Used to force a variable to a SIMD register. */
4045
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
4048
+ : /* No clobbers */);
4050
/* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
4055
test_vceqd_s64 (int64x1_t a, int64x1_t b)
4057
- return vceqd_s64 (a, b);
4061
+ res = vceqd_s64 (a, b);
4066
/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
4069
test_vceqzd_s64 (int64x1_t a)
4071
- return vceqzd_s64 (a);
4074
+ res = vceqzd_s64 (a);
4079
/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
4082
test_vcged_s64 (int64x1_t a, int64x1_t b)
4084
- return vcged_s64 (a, b);
4088
+ res = vcged_s64 (a, b);
4094
test_vcled_s64 (int64x1_t a, int64x1_t b)
4096
- return vcled_s64 (a, b);
4100
+ res = vcled_s64 (a, b);
4105
-/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
4106
+/* Idiom recognition will cause this testcase not to generate
4107
+ the expected cmge instruction, so do not check for it. */
4110
test_vcgezd_s64 (int64x1_t a)
4112
- return vcgezd_s64 (a);
4115
+ res = vcgezd_s64 (a);
4120
/* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
4123
test_vcged_u64 (uint64x1_t a, uint64x1_t b)
4125
- return vcged_u64 (a, b);
4129
+ res = vcged_u64 (a, b);
4134
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
4135
@@ -77,13 +112,23 @@
4137
test_vcgtd_s64 (int64x1_t a, int64x1_t b)
4139
- return vcgtd_s64 (a, b);
4143
+ res = vcgtd_s64 (a, b);
4149
test_vcltd_s64 (int64x1_t a, int64x1_t b)
4151
- return vcltd_s64 (a, b);
4155
+ res = vcltd_s64 (a, b);
4160
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
4163
test_vcgtzd_s64 (int64x1_t a)
4165
- return vcgtzd_s64 (a);
4168
+ res = vcgtzd_s64 (a);
4173
/* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
4176
test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
4178
- return vcgtd_u64 (a, b);
4182
+ res = vcgtd_u64 (a, b);
4187
/* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
4188
@@ -107,15 +161,24 @@
4190
test_vclezd_s64 (int64x1_t a)
4192
- return vclezd_s64 (a);
4195
+ res = vclezd_s64 (a);
4200
-/* { dg-final { scan-assembler-times "\\tcmlt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
4201
+/* Idiom recognition will cause this testcase not to generate
4202
+ the expected cmlt instruction, so do not check for it. */
4205
test_vcltzd_s64 (int64x1_t a)
4207
- return vcltzd_s64 (a);
4210
+ res = vcltzd_s64 (a);
4215
/* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */
4216
@@ -160,18 +223,18 @@
4217
return vdups_lane_u32 (a, 2);
4220
-/* { dg-final { scan-assembler-times "\\tdup\\td\[0-9\]+, v\[0-9\]+\.d" 2 } } */
4221
+/* { dg-final { scan-assembler-times "\\tumov\\tx\[0-9\]+, v\[0-9\]+\.d" 2 } } */
4224
test_vdupd_lane_s64 (int64x2_t a)
4226
- return vdupd_lane_s64 (a, 2);
4227
+ return vdupd_lane_s64 (a, 1);
4231
test_vdupd_lane_u64 (uint64x2_t a)
4233
- return vdupd_lane_u64 (a, 2);
4234
+ return vdupd_lane_u64 (a, 1);
4237
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
4238
@@ -179,13 +242,23 @@
4240
test_vtst_s64 (int64x1_t a, int64x1_t b)
4242
- return vtstd_s64 (a, b);
4246
+ res = vtstd_s64 (a, b);
4252
test_vtst_u64 (uint64x1_t a, uint64x1_t b)
4254
- return vtstd_u64 (a, b);
4258
+ res = vtstd_s64 (a, b);
4263
/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
4264
@@ -722,8 +795,11 @@
4265
return vrshld_u64 (a, b);
4268
-/* { dg-final { scan-assembler-times "\\tasr\\tx\[0-9\]+" 1 } } */
4269
+/* Other intrinsics can generate an asr instruction (vcltzd, vcgezd),
4270
+ so we cannot check scan-assembler-times. */
4272
+/* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
4275
test_vshrd_n_s64 (int64x1_t a)
4277
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
4278
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
4280
/* { dg-do compile } */
4281
/* { dg-options "-O2" } */
4284
+#include "atomic-op-int.x"
4287
-atomic_fetch_add_RELAXED (int a)
4289
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4293
-atomic_fetch_sub_RELAXED (int a)
4295
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4299
-atomic_fetch_and_RELAXED (int a)
4301
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4305
-atomic_fetch_nand_RELAXED (int a)
4307
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4311
-atomic_fetch_xor_RELAXED (int a)
4313
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4317
-atomic_fetch_or_RELAXED (int a)
4319
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4322
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4323
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4324
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
4325
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
4327
/* { dg-do compile } */
4328
/* { dg-options "-O2" } */
4331
+#include "atomic-op-seq_cst.x"
4334
-atomic_fetch_add_SEQ_CST (int a)
4336
- return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
4340
-atomic_fetch_sub_SEQ_CST (int a)
4342
- return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
4346
-atomic_fetch_and_SEQ_CST (int a)
4348
- return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
4352
-atomic_fetch_nand_SEQ_CST (int a)
4354
- return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
4358
-atomic_fetch_xor_SEQ_CST (int a)
4360
- return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
4364
-atomic_fetch_or_SEQ_CST (int a)
4366
- return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
4369
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4370
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4371
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
4372
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
4376
+test_vaddv_v2sf (const float32_t *pool)
4380
+ val = vld1_f32 (pool);
4381
+ return vaddv_f32 (val);
4385
+test_vaddv_v4sf (const float32_t *pool)
4389
+ val = vld1q_f32 (pool);
4390
+ return vaddvq_f32 (val);
4394
+test_vaddv_v2df (const float64_t *pool)
4398
+ val = vld1q_f64 (pool);
4399
+ return vaddvq_f64 (val);
4401
--- a/src/gcc/testsuite/gcc.target/aarch64/negs.c
4402
+++ b/src/gcc/testsuite/gcc.target/aarch64/negs.c
4404
+/* { dg-do run } */
4405
+/* { dg-options "-O2 --save-temps" } */
4407
+extern void abort (void);
4411
+negs_si_test1 (int a, int b, int c)
4415
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+" } } */
4424
+negs_si_test3 (int a, int b, int c)
4426
+ int d = -(b) << 3;
4428
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4436
+typedef long long s64;
4440
+negs_di_test1 (s64 a, s64 b, s64 c)
4444
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+" } } */
4453
+negs_di_test3 (s64 a, s64 b, s64 c)
4455
+ s64 d = -(b) << 3;
4457
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4470
+ x = negs_si_test1 (2, 12, 5);
4474
+ x = negs_si_test1 (1, 2, 32);
4478
+ x = negs_si_test3 (13, 14, 5);
4482
+ x = negs_si_test3 (15, 21, 2);
4486
+ y = negs_di_test1 (0x20202020ll,
4489
+ if (y != 0x62636263ll)
4492
+ y = negs_di_test1 (0x1010101010101ll,
4493
+ 0x123456789abcdll,
4494
+ 0x5555555555555ll);
4495
+ if (y != 0x6565656565656ll)
4498
+ y = negs_di_test3 (0x62523781ll,
4501
+ if (y != 0xfffffffd553d4edbll)
4504
+ y = negs_di_test3 (0x763526268ll,
4507
+ if (y != 0xfffffffb1b1b1b1bll)
4512
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
4513
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
4515
/* { dg-do compile } */
4516
/* { dg-options "-O2" } */
4519
+#include "atomic-op-consume.x"
4522
-atomic_fetch_add_CONSUME (int a)
4524
- return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
4528
-atomic_fetch_sub_CONSUME (int a)
4530
- return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
4534
-atomic_fetch_and_CONSUME (int a)
4536
- return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
4540
-atomic_fetch_nand_CONSUME (int a)
4542
- return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
4546
-atomic_fetch_xor_CONSUME (int a)
4548
- return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
4552
-atomic_fetch_or_CONSUME (int a)
4554
- return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
4557
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4558
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4559
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
4560
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
4562
+/* { dg-do run } */
4563
+/* { dg-options "-O3 --save-temps -ffast-math" } */
4565
+#include <arm_neon.h>
4567
+extern void abort (void);
4568
+extern float fabsf (float);
4569
+extern double fabs (double);
4571
+#define NUM_TESTS 16
4572
+#define DELTA 0.000001
4574
+int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
4575
+ -4, 34, 110, -110, 6, 4, 75, -34};
4576
+int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
4577
+ -4, 34, 110, -110, 6, 4, 75, -34};
4578
+int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
4579
+ -4, 34, 110, -110, 6, 4, 75, -34};
4580
+int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76,
4581
+ -4, 34, 110, -110, 6, 4, 75, -34};
4583
+uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
4584
+ 4, 34, 110, 110, 6, 4, 75, 34};
4585
+uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
4586
+ 4, 34, 110, 110, 6, 4, 75, 34};
4587
+uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
4588
+ 4, 34, 110, 110, 6, 4, 75, 34};
4590
+uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76,
4591
+ 4, 34, 110, 110, 6, 4, 75, 34};
4593
+float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
4594
+ 200.0f, -800.0f, -13.0f, -0.5f,
4595
+ 7.9f, -870.0f, 10.4f, 310.11f,
4596
+ 0.0f, -865.0f, -2213.0f, -1.5f};
4598
+double input_float64[] = {0.1, -0.1, 0.4, 10.3,
4599
+ 200.0, -800.0, -13.0, -0.5,
4600
+ 7.9, -870.0, 10.4, 310.11,
4601
+ 0.0, -865.0, -2213.0, -1.5};
4603
+#define EQUALF(a, b) (fabsf (a - b) < DELTA)
4604
+#define EQUALD(a, b) (fabs (a - b) < DELTA)
4605
+#define EQUALL(a, b) (a == b)
4607
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
4609
+test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \
4612
+ int moves = (NUM_TESTS - LANES) + 1; \
4613
+ TYPE##_t out_l[NUM_TESTS]; \
4614
+ TYPE##_t out_v[NUM_TESTS]; \
4616
+ /* Calculate linearly. */ \
4617
+ for (i = 0; i < moves; i++) \
4619
+ out_l[i] = input_##TYPE[i]; \
4620
+ for (j = 1; j < LANES; j++) \
4621
+ out_l[i] += input_##TYPE[i + j]; \
4624
+ /* Calculate using vector reduction intrinsics. */ \
4625
+ for (i = 0; i < moves; i++) \
4627
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
4628
+ out_v[i] = vaddv##Q##_##SUFFIX (t1); \
4632
+ for (i = 0; i < moves; i++) \
4634
+ if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
4640
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
4641
+TEST (STYPE, , TYPE, W32, F) \
4642
+TEST (STYPE, q, TYPE, W64, F) \
4644
+BUILD_VARIANTS (int8, s8, 8, 16, L)
4645
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
4646
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
4647
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
4648
+BUILD_VARIANTS (int16, s16, 4, 8, L)
4649
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
4650
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
4651
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
4652
+BUILD_VARIANTS (int32, s32, 2, 4, L)
4653
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
4654
+/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4655
+/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
4656
+TEST (s64, q, int64, 2, D)
4657
+TEST (u64, q, uint64, 2, D)
4658
+/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
4660
+BUILD_VARIANTS (float32, f32, 2, 4, F)
4661
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
4662
+/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4663
+TEST (f64, q, float64, 2, D)
4664
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
4667
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
4669
+ if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ()) \
4674
+main (int argc, char **argv)
4676
+BUILD_VARIANTS (int8, s8, 8, 16, L)
4677
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
4678
+BUILD_VARIANTS (int16, s16, 4, 8, L)
4679
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
4680
+BUILD_VARIANTS (int32, s32, 2, 4, L)
4681
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
4683
+BUILD_VARIANTS (float32, f32, 2, 4, F)
4684
+TEST (f64, q, float64, 2, D)
4689
+/* { dg-final { cleanup-saved-temps } } */
4690
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
4691
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
4693
/* { dg-do compile } */
4694
/* { dg-options "-O2" } */
4697
+#include "atomic-op-char.x"
4700
-atomic_fetch_add_RELAXED (char a)
4702
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4706
-atomic_fetch_sub_RELAXED (char a)
4708
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4712
-atomic_fetch_and_RELAXED (char a)
4714
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4718
-atomic_fetch_nand_RELAXED (char a)
4720
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4724
-atomic_fetch_xor_RELAXED (char a)
4726
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4730
-atomic_fetch_or_RELAXED (char a)
4732
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4735
/* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4736
/* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4737
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
4738
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
4743
+atomic_fetch_add_RELAXED (int a)
4745
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4749
+atomic_fetch_sub_RELAXED (int a)
4751
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4755
+atomic_fetch_and_RELAXED (int a)
4757
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4761
+atomic_fetch_nand_RELAXED (int a)
4763
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4767
+atomic_fetch_xor_RELAXED (int a)
4769
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4773
+atomic_fetch_or_RELAXED (int a)
4775
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4777
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
4778
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
4783
+atomic_fetch_add_SEQ_CST (int a)
4785
+ return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
4789
+atomic_fetch_sub_SEQ_CST (int a)
4791
+ return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
4795
+atomic_fetch_and_SEQ_CST (int a)
4797
+ return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
4801
+atomic_fetch_nand_SEQ_CST (int a)
4803
+ return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
4807
+atomic_fetch_xor_SEQ_CST (int a)
4809
+ return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
4813
+atomic_fetch_or_SEQ_CST (int a)
4815
+ return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
4817
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
4818
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
4823
+atomic_fetch_add_CONSUME (int a)
4825
+ return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
4829
+atomic_fetch_sub_CONSUME (int a)
4831
+ return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
4835
+atomic_fetch_and_CONSUME (int a)
4837
+ return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
4841
+atomic_fetch_nand_CONSUME (int a)
4843
+ return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
4847
+atomic_fetch_xor_CONSUME (int a)
4849
+ return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
4853
+atomic_fetch_or_CONSUME (int a)
4855
+ return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
4857
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
4858
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
4860
/* { dg-do compile } */
4861
/* { dg-options "-O2" } */
4864
+#include "atomic-op-short.x"
4867
-atomic_fetch_add_RELAXED (short a)
4869
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4873
-atomic_fetch_sub_RELAXED (short a)
4875
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4879
-atomic_fetch_and_RELAXED (short a)
4881
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4885
-atomic_fetch_nand_RELAXED (short a)
4887
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4891
-atomic_fetch_xor_RELAXED (short a)
4893
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4897
-atomic_fetch_or_RELAXED (short a)
4899
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4902
/* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4903
/* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4904
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
4905
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
4910
+atomic_fetch_add_RELAXED (char a)
4912
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4916
+atomic_fetch_sub_RELAXED (char a)
4918
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4922
+atomic_fetch_and_RELAXED (char a)
4924
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4928
+atomic_fetch_nand_RELAXED (char a)
4930
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4934
+atomic_fetch_xor_RELAXED (char a)
4936
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4940
+atomic_fetch_or_RELAXED (char a)
4942
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4944
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
4945
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
4947
/* { dg-final { scan-assembler "fdiv\\tv" } } */
4948
/* { dg-final { scan-assembler "fneg\\tv" } } */
4949
/* { dg-final { scan-assembler "fabs\\tv" } } */
4950
+/* { dg-final { scan-assembler "fabd\\tv" } } */
4951
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
4952
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
4954
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
4961
#include "vect-fcm.x"
4963
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
4964
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
4965
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
4966
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
4967
/* { dg-final { cleanup-tree-dump "vect" } } */
4968
--- a/src/gcc/testsuite/gcc.target/aarch64/adds1.c
4969
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds1.c
4971
+/* { dg-do run } */
4972
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4974
+extern void abort (void);
4977
+adds_si_test1 (int a, int b, int c)
4981
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4989
+adds_si_test2 (int a, int b, int c)
4993
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, 255" } } */
5001
+adds_si_test3 (int a, int b, int c)
5003
+ int d = a + (b << 3);
5005
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
5012
+typedef long long s64;
5015
+adds_di_test1 (s64 a, s64 b, s64 c)
5019
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
5027
+adds_di_test2 (s64 a, s64 b, s64 c)
5031
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, 255" } } */
5039
+adds_di_test3 (s64 a, s64 b, s64 c)
5041
+ s64 d = a + (b << 3);
5043
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
5055
+ x = adds_si_test1 (29, 4, 5);
5059
+ x = adds_si_test1 (5, 2, 20);
5063
+ x = adds_si_test2 (29, 4, 5);
5067
+ x = adds_si_test2 (1024, 2, 20);
5071
+ x = adds_si_test3 (35, 4, 5);
5075
+ x = adds_si_test3 (5, 2, 20);
5079
+ y = adds_di_test1 (0x130000029ll,
5083
+ if (y != 0xc75050536)
5086
+ y = adds_di_test1 (0x5000500050005ll,
5087
+ 0x2111211121112ll,
5088
+ 0x0000000002020ll);
5089
+ if (y != 0x9222922294249)
5092
+ y = adds_di_test2 (0x130000029ll,
5095
+ if (y != 0x955050631)
5098
+ y = adds_di_test2 (0x130002900ll,
5101
+ if (y != 0x955052f08)
5104
+ y = adds_di_test3 (0x130000029ll,
5107
+ if (y != 0x9b9050576)
5110
+ y = adds_di_test3 (0x130002900ll,
5113
+ if (y != 0xafd052e4d)
5119
+/* { dg-final { cleanup-saved-temps } } */
5120
--- a/src/gcc/testsuite/gcc.target/aarch64/insv_1.c
5121
+++ b/src/gcc/testsuite/gcc.target/aarch64/insv_1.c
5123
+/* { dg-do run } */
5124
+/* { dg-options "-O2 --save-temps -fno-inline" } */
5126
+extern void abort (void);
5128
+typedef struct bitfield
5130
+ unsigned short eight: 8;
5131
+ unsigned short four: 4;
5132
+ unsigned short five: 5;
5133
+ unsigned short seven: 7;
5134
+ unsigned int sixteen: 16;
5140
+ /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 0, 8" } } */
5148
+ /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 16, 5" } } */
5156
+ /* { dg-final { scan-assembler "movk\tx\[0-9\]+, 0x1d6b, lsl 32" } } */
5164
+ /* { dg-final { scan-assembler "orr\tx\[0-9\]+, x\[0-9\]+, 2031616" } } */
5172
+ /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -2031617" } } */
5179
+main (int argc, char** argv)
5181
+ static bitfield a;
5182
+ bitfield b = bfi1 (a);
5183
+ bitfield c = bfi2 (b);
5184
+ bitfield d = movk (c);
5192
+ if (d.sixteen != 7531)
5196
+ if (d.five != 0x1f)
5206
+/* { dg-final { cleanup-saved-temps } } */
5207
--- a/src/gcc/testsuite/gcc.target/aarch64/ror.c
5208
+++ b/src/gcc/testsuite/gcc.target/aarch64/ror.c
5210
+/* { dg-options "-O2 --save-temps" } */
5211
+/* { dg-do run } */
5213
+extern void abort (void);
5218
+ /* { dg-final { scan-assembler "ror\tw\[0-9\]+, w\[0-9\]+, 27\n" } } */
5219
+ return (a << 5) | ((unsigned int) a >> 27);
5223
+test_di (long long a)
5225
+ /* { dg-final { scan-assembler "ror\tx\[0-9\]+, x\[0-9\]+, 45\n" } } */
5226
+ return (a << 19) | ((unsigned long long) a >> 45);
5234
+ v = test_si (0x0203050);
5235
+ if (v != 0x4060a00)
5237
+ w = test_di (0x0000020506010304ll);
5238
+ if (w != 0x1028300818200000ll)
5243
+/* { dg-final { cleanup-saved-temps } } */
5244
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
5245
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
5247
/* { dg-do compile } */
5248
/* { dg-options "-O2" } */
5251
+#include "atomic-op-release.x"
5254
-atomic_fetch_add_RELEASE (int a)
5256
- return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
5260
-atomic_fetch_sub_RELEASE (int a)
5262
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
5266
-atomic_fetch_and_RELEASE (int a)
5268
- return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
5272
-atomic_fetch_nand_RELEASE (int a)
5274
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
5278
-atomic_fetch_xor_RELEASE (int a)
5280
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
5284
-atomic_fetch_or_RELEASE (int a)
5286
- return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
5289
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5290
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5291
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c
5292
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c
5294
+/* { dg-do run } */
5295
+/* { dg-options "-O3 --save-temps -ffast-math" } */
5297
+#include <arm_neon.h>
5299
+extern void abort (void);
5301
+extern float fabsf (float);
5302
+extern double fabs (double);
5303
+extern int isnan (double);
5304
+extern float fmaxf (float, float);
5305
+extern float fminf (float, float);
5306
+extern double fmax (double, double);
5307
+extern double fmin (double, double);
5309
+#define NUM_TESTS 16
5310
+#define DELTA 0.000001
5311
+#define NAN (0.0 / 0.0)
5313
+float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
5314
+ 200.0f, -800.0f, -13.0f, -0.5f,
5315
+ NAN, -870.0f, 10.4f, 310.11f,
5316
+ 0.0f, -865.0f, -2213.0f, -1.5f};
5318
+double input_float64[] = {0.1, -0.1, 0.4, 10.3,
5319
+ 200.0, -800.0, -13.0, -0.5,
5320
+ NAN, -870.0, 10.4, 310.11,
5321
+ 0.0, -865.0, -2213.0, -1.5};
5323
+#define EQUALF(a, b) (fabsf (a - b) < DELTA)
5324
+#define EQUALD(a, b) (fabs (a - b) < DELTA)
5326
+/* Floating point 'unordered' variants. */
5329
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
5331
+test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
5334
+ int moves = (NUM_TESTS - LANES) + 1; \
5335
+ TYPE##_t out_l[NUM_TESTS]; \
5336
+ TYPE##_t out_v[NUM_TESTS]; \
5338
+ /* Calculate linearly. */ \
5339
+ for (i = 0; i < moves; i++) \
5341
+ out_l[i] = input_##TYPE[i]; \
5342
+ for (j = 0; j < LANES; j++) \
5344
+ if (isnan (out_l[i])) \
5346
+ if (isnan (input_##TYPE[i + j]) \
5347
+ || input_##TYPE[i + j] CMP_OP out_l[i]) \
5348
+ out_l[i] = input_##TYPE[i + j]; \
5352
+ /* Calculate using vector reduction intrinsics. */ \
5353
+ for (i = 0; i < moves; i++) \
5355
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
5356
+ out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
5360
+ for (i = 0; i < moves; i++) \
5362
+ if (!EQUAL##FLOAT (out_v[i], out_l[i]) \
5363
+ && !(isnan (out_v[i]) && isnan (out_l[i]))) \
5369
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
5370
+TEST (max, >, STYPE, , TYPE, W32, F) \
5371
+TEST (max, >, STYPE, q, TYPE, W64, F) \
5372
+TEST (min, <, STYPE, , TYPE, W32, F) \
5373
+TEST (min, <, STYPE, q, TYPE, W64, F)
5375
+BUILD_VARIANTS (float32, f32, 2, 4, F)
5376
+/* { dg-final { scan-assembler "fmaxp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
5377
+/* { dg-final { scan-assembler "fminp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
5378
+/* { dg-final { scan-assembler "fmaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5379
+/* { dg-final { scan-assembler "fminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5380
+TEST (max, >, f64, q, float64, 2, D)
5381
+/* { dg-final { scan-assembler "fmaxp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
5382
+TEST (min, <, f64, q, float64, 2, D)
5383
+/* { dg-final { scan-assembler "fminp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
5385
+/* Floating point 'nm' variants. */
5388
+#define TEST(MAXMIN, F, SUFFIX, Q, TYPE, LANES, FLOAT) \
5390
+test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t (void) \
5393
+ int moves = (NUM_TESTS - LANES) + 1; \
5394
+ TYPE##_t out_l[NUM_TESTS]; \
5395
+ TYPE##_t out_v[NUM_TESTS]; \
5397
+ /* Calculate linearly. */ \
5398
+ for (i = 0; i < moves; i++) \
5400
+ out_l[i] = input_##TYPE[i]; \
5401
+ for (j = 0; j < LANES; j++) \
5402
+ out_l[i] = f##MAXMIN##F (input_##TYPE[i + j], out_l[i]); \
5405
+ /* Calculate using vector reduction intrinsics. */ \
5406
+ for (i = 0; i < moves; i++) \
5408
+ TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
5409
+ out_v[i] = v##MAXMIN##nmv##Q##_##SUFFIX (t1); \
5413
+ for (i = 0; i < moves; i++) \
5415
+ if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
5421
+TEST (max, f, f32, , float32, 2, D)
5422
+/* { dg-final { scan-assembler "fmaxnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
5423
+TEST (min, f, f32, , float32, 2, D)
5424
+/* { dg-final { scan-assembler "fminnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
5425
+TEST (max, f, f32, q, float32, 4, D)
5426
+/* { dg-final { scan-assembler "fmaxnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5427
+TEST (min, f, f32, q, float32, 4, D)
5428
+/* { dg-final { scan-assembler "fminnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
5429
+TEST (max, , f64, q, float64, 2, D)
5430
+/* { dg-final { scan-assembler "fmaxnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
5431
+TEST (min, , f64, q, float64, 2, D)
5432
+/* { dg-final { scan-assembler "fminnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
5435
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
5437
+ if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
5442
+main (int argc, char **argv)
5444
+ BUILD_VARIANTS (float32, f32, 2, 4, F)
5445
+ TEST (max, >, f64, q, float64, 2, D)
5446
+ TEST (min, <, f64, q, float64, 2, D)
5449
+#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
5451
+ if (!test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t ()) \
5455
+ BUILD_VARIANTS (float32, f32, 2, 4, F)
5456
+ TEST (max, >, f64, q, float64, 2, D)
5457
+ TEST (min, <, f64, q, float64, 2, D)
5462
+/* { dg-final { cleanup-saved-temps } } */
5463
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
5464
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
5469
+atomic_fetch_add_RELAXED (short a)
5471
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
5475
+atomic_fetch_sub_RELAXED (short a)
5477
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
5481
+atomic_fetch_and_RELAXED (short a)
5483
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
5487
+atomic_fetch_nand_RELAXED (short a)
5489
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
5493
+atomic_fetch_xor_RELAXED (short a)
5495
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
5499
+atomic_fetch_or_RELAXED (short a)
5501
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
5503
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
5504
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
5506
+/* { dg-do run } */
5507
+/* { dg-options "-O3 --save-temps -ffast-math" } */
5509
+#include <arm_neon.h>
5511
+extern void abort (void);
5512
+extern double fabs (double);
5514
+#define NUM_TESTS 8
5515
+#define DELTA 0.000001
5517
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
5518
+ 200.0f, -800.0f, -13.0f, -0.5f};
5519
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
5520
+ 200.0, -800.0, -13.0, -0.5};
5522
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
5524
+test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t (void) \
5528
+ int nlanes = LANES; \
5529
+ U##int##WIDTH##_t expected_out[NUM_TESTS]; \
5530
+ U##int##WIDTH##_t actual_out[NUM_TESTS]; \
5532
+ for (i = 0; i < NUM_TESTS; i++) \
5535
+ = vcvt##SUFFIX##D##_##S##WIDTH##_f##WIDTH (input_f##WIDTH[i]); \
5536
+ /* Don't vectorize this. */ \
5537
+ asm volatile ("" : : : "memory"); \
5540
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
5542
+ U##int##WIDTH##x##LANES##_t out = \
5543
+ vcvt##SUFFIX##Q##_##S##WIDTH##_f##WIDTH \
5544
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
5545
+ vst1##Q##_##S##WIDTH (actual_out + i, out); \
5548
+ for (i = 0; i < NUM_TESTS; i++) \
5549
+ ret &= fabs (expected_out[i] - actual_out[i]) < DELTA; \
5555
+#define BUILD_VARIANTS(SUFFIX) \
5556
+TEST (SUFFIX, , 32, 2, s, ,s) \
5557
+TEST (SUFFIX, q, 32, 4, s, ,s) \
5558
+TEST (SUFFIX, q, 64, 2, s, ,d) \
5559
+TEST (SUFFIX, , 32, 2, u,u,s) \
5560
+TEST (SUFFIX, q, 32, 4, u,u,s) \
5561
+TEST (SUFFIX, q, 64, 2, u,u,d) \
5564
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
5565
+/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */
5566
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5567
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5568
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5569
+/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */
5570
+/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */
5571
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5572
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5573
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5575
+/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */
5576
+/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */
5577
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5578
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5579
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5580
+/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */
5581
+/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */
5582
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5583
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5584
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5586
+/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */
5587
+/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */
5588
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5589
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5590
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5591
+/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */
5592
+/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */
5593
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5594
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5595
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5597
+/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */
5598
+/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */
5599
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5600
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5601
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5602
+/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */
5603
+/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */
5604
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5605
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5606
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5608
+/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */
5609
+/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */
5610
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5611
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5612
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5613
+/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */
5614
+/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */
5615
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
5616
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
5617
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
5620
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
5622
+ if (!test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t ()) \
5627
+main (int argc, char **argv)
5629
+ BUILD_VARIANTS ( )
5630
+ BUILD_VARIANTS (a)
5631
+ BUILD_VARIANTS (m)
5632
+ BUILD_VARIANTS (n)
5633
+ BUILD_VARIANTS (p)
5637
+/* { dg-final { cleanup-saved-temps } } */
5638
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
5639
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
5644
+atomic_fetch_add_RELEASE (int a)
5646
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
5650
+atomic_fetch_sub_RELEASE (int a)
5652
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
5656
+atomic_fetch_and_RELEASE (int a)
5658
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
5662
+atomic_fetch_nand_RELEASE (int a)
5664
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
5668
+atomic_fetch_xor_RELEASE (int a)
5670
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
5674
+atomic_fetch_or_RELEASE (int a)
5676
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
5678
--- a/src/gcc/testsuite/gcc.target/aarch64/fabd.c
5679
+++ b/src/gcc/testsuite/gcc.target/aarch64/fabd.c
5681
+/* { dg-do run } */
5682
+/* { dg-options "-O1 -fno-inline --save-temps" } */
5684
+extern double fabs (double);
5685
+extern float fabsf (float);
5686
+extern void abort ();
5687
+extern void exit (int);
5690
+fabd_d (double x, double y, double d)
5692
+ if ((fabs (x - y) - d) > 0.00001)
5696
+/* { dg-final { scan-assembler "fabd\td\[0-9\]+" } } */
5699
+fabd_f (float x, float y, float d)
5701
+ if ((fabsf (x - y) - d) > 0.00001)
5705
+/* { dg-final { scan-assembler "fabd\ts\[0-9\]+" } } */
5710
+ fabd_d (10.0, 5.0, 5.0);
5711
+ fabd_d (5.0, 10.0, 5.0);
5712
+ fabd_f (10.0, 5.0, 5.0);
5713
+ fabd_f (5.0, 10.0, 5.0);
5718
+/* { dg-final { cleanup-saved-temps } } */
5719
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
5720
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
5721
@@ -117,6 +117,16 @@
5722
9.0, 10.0, 11.0, 12.0,
5723
13.0, 14.0, 15.0, 16.0 };
5725
+ F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
5726
+ 1.0f, 1.0f, 1.0f, 1.0f,
5727
+ 1.0f, 1.0f, 1.0f, 1.0f,
5728
+ 1.0f, 1.0f, 1.0f, 1.0f };
5730
+ F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
5731
+ 1.0, 1.0, 1.0, 1.0,
5732
+ 1.0, 1.0, 1.0, 1.0,
5733
+ 1.0, 1.0, 1.0, 1.0 };
5735
/* Setup input vectors. */
5736
for (i=1; i<=16; i++)
5746
--- a/src/gcc/testsuite/gcc.target/aarch64/ngc.c
5747
+++ b/src/gcc/testsuite/gcc.target/aarch64/ngc.c
5749
+/* { dg-do run } */
5750
+/* { dg-options "-O2 --save-temps -fno-inline" } */
5752
+extern void abort (void);
5753
+typedef unsigned int u32;
5756
+ngc_si (u32 a, u32 b, u32 c, u32 d)
5762
+typedef unsigned long long u64;
5765
+ngc_si_tst (u64 a, u32 b, u32 c, u32 d)
5772
+ngc_di (u64 a, u64 b, u64 c, u64 d)
5784
+ x = ngc_si (29, 4, 5, 4);
5788
+ x = ngc_si (1024, 2, 20, 13);
5792
+ y = ngc_si_tst (0x130000029ll, 32, 50, 12);
5793
+ if (y != 0xffffffe0)
5796
+ y = ngc_si_tst (0x5000500050005ll, 21, 2, 14);
5797
+ if (y != 0xffffffea)
5800
+ y = ngc_di (0x130000029ll, 0x320000004ll, 0x505050505ll, 0x123123123ll);
5801
+ if (y != 0xfffffffcdffffffc)
5804
+ y = ngc_di (0x5000500050005ll,
5805
+ 0x2111211121112ll, 0x0000000002020ll, 0x1414575046477ll);
5806
+ if (y != 0xfffdeeedeeedeeed)
5812
+/* { dg-final { scan-assembler-times "ngc\tw\[0-9\]+, w\[0-9\]+" 2 } } */
5813
+/* { dg-final { scan-assembler-times "ngc\tx\[0-9\]+, x\[0-9\]+" 1 } } */
5814
+/* { dg-final { cleanup-saved-temps } } */
5815
--- a/src/gcc/testsuite/gcc.target/aarch64/cmp.c
5816
+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp.c
5818
+/* { dg-do compile } */
5819
+/* { dg-options "-O2" } */
5822
+cmp_si_test1 (int a, int b, int c)
5831
+cmp_si_test2 (int a, int b, int c)
5839
+typedef long long s64;
5842
+cmp_di_test1 (s64 a, s64 b, s64 c)
5851
+cmp_di_test2 (s64 a, s64 b, s64 c)
5860
+cmp_di_test3 (int a, s64 b, s64 c)
5869
+cmp_di_test4 (int a, s64 b, s64 c)
5871
+ if (((s64)a << 3) > b)
5877
+/* { dg-final { scan-assembler-times "cmp\tw\[0-9\]+, w\[0-9\]+" 2 } } */
5878
+/* { dg-final { scan-assembler-times "cmp\tx\[0-9\]+, x\[0-9\]+" 4 } } */
5879
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
5880
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
5882
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
5889
#include "vect-fcm.x"
5891
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
5892
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
5893
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
5894
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
5895
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
5896
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
5897
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
5899
extern float fabsf (float);
5900
extern double fabs (double);
5902
+#define DEF3a(fname, type, op) \
5903
+ void fname##_##type (pR##type a, \
5908
+ for (i = 0; i < 16; i++) \
5909
+ a[i] = op (b[i] - c[i]); \
5912
#define DEF3(fname, type, op) \
5913
void fname##_##type (pR##type a, \
5918
- for (i=0; i<16; i++) \
5919
+ for (i = 0; i < 16; i++) \
5920
a[i] = b[i] op c[i]; \
5927
- for (i=0; i<16; i++) \
5928
+ for (i = 0; i < 16; i++) \
5933
+#define DEFN3a(fname, op) \
5934
+ DEF3a (fname, F32, op) \
5935
+ DEF3a (fname, F64, op)
5937
#define DEFN3(fname, op) \
5938
DEF3 (fname, F32, op) \
5939
DEF3 (fname, F64, op)
5942
DEF2 (abs, F32, fabsf)
5943
DEF2 (abs, F64, fabs)
5944
+DEF3a (fabd, F32, fabsf)
5945
+DEF3a (fabd, F64, fabs)
5946
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
5947
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
5949
/* { dg-do compile } */
5950
/* { dg-options "-O2" } */
5953
+#include "atomic-op-acq_rel.x"
5956
-atomic_fetch_add_ACQ_REL (int a)
5958
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
5962
-atomic_fetch_sub_ACQ_REL (int a)
5964
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
5968
-atomic_fetch_and_ACQ_REL (int a)
5970
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
5974
-atomic_fetch_nand_ACQ_REL (int a)
5976
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
5980
-atomic_fetch_xor_ACQ_REL (int a)
5982
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
5986
-atomic_fetch_or_ACQ_REL (int a)
5988
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
5991
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5992
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
5993
--- a/src/gcc/testsuite/gcc.target/aarch64/subs1.c
5994
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs1.c
5996
+/* { dg-do run } */
5997
+/* { dg-options "-O2 --save-temps -fno-inline" } */
5999
+extern void abort (void);
6002
+subs_si_test1 (int a, int b, int c)
6006
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
6014
+subs_si_test2 (int a, int b, int c)
6018
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, #255" } } */
6026
+subs_si_test3 (int a, int b, int c)
6028
+ int d = a - (b << 3);
6030
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
6037
+typedef long long s64;
6040
+subs_di_test1 (s64 a, s64 b, s64 c)
6044
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
6052
+subs_di_test2 (s64 a, s64 b, s64 c)
6056
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, #255" } } */
6064
+subs_di_test3 (s64 a, s64 b, s64 c)
6066
+ s64 d = a - (b << 3);
6068
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
6080
+ x = subs_si_test1 (29, 4, 5);
6084
+ x = subs_si_test1 (5, 2, 20);
6088
+ x = subs_si_test2 (29, 4, 5);
6092
+ x = subs_si_test2 (1024, 2, 20);
6096
+ x = subs_si_test3 (35, 4, 5);
6100
+ x = subs_si_test3 (5, 2, 20);
6104
+ y = subs_di_test1 (0x130000029ll,
6108
+ if (y != 0x45000002d)
6111
+ y = subs_di_test1 (0x5000500050005ll,
6112
+ 0x2111211121112ll,
6113
+ 0x0000000002020ll);
6114
+ if (y != 0x7111711171117)
6117
+ y = subs_di_test2 (0x130000029ll,
6120
+ if (y != 0x955050433)
6123
+ y = subs_di_test2 (0x130002900ll,
6126
+ if (y != 0x955052d0a)
6129
+ y = subs_di_test3 (0x130000029ll,
6132
+ if (y != 0x3790504f6)
6135
+ y = subs_di_test3 (0x130002900ll,
6138
+ if (y != 0x27d052dcd)
6144
+/* { dg-final { cleanup-saved-temps } } */
6145
--- a/src/gcc/testsuite/gcc.target/aarch64/adds2.c
6146
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds2.c
6148
+/* { dg-do run } */
6149
+/* { dg-options "-O2 --save-temps -fno-inline" } */
6151
+extern void abort (void);
6154
+adds_si_test1 (int a, int b, int c)
6158
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
6159
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
6167
+adds_si_test2 (int a, int b, int c)
6169
+ int d = a + 0xfff;
6171
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
6172
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
6180
+adds_si_test3 (int a, int b, int c)
6182
+ int d = a + (b << 3);
6184
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
6185
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
6192
+typedef long long s64;
6195
+adds_di_test1 (s64 a, s64 b, s64 c)
6199
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
6200
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
6208
+adds_di_test2 (s64 a, s64 b, s64 c)
6210
+ s64 d = a + 0x1000ll;
6212
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
6213
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
6221
+adds_di_test3 (s64 a, s64 b, s64 c)
6223
+ s64 d = a + (b << 3);
6225
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
6226
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
6238
+ x = adds_si_test1 (29, 4, 5);
6242
+ x = adds_si_test1 (5, 2, 20);
6246
+ x = adds_si_test2 (29, 4, 5);
6250
+ x = adds_si_test2 (1024, 2, 20);
6254
+ x = adds_si_test3 (35, 4, 5);
6258
+ x = adds_si_test3 (5, 2, 20);
6262
+ y = adds_di_test1 (0x130000029ll,
6266
+ if (y != 0xc75050536)
6269
+ y = adds_di_test1 (0x5000500050005ll,
6270
+ 0x2111211121112ll,
6271
+ 0x0000000002020ll);
6272
+ if (y != 0x9222922294249)
6275
+ y = adds_di_test2 (0x130000029ll,
6278
+ if (y != 0x955051532)
6281
+ y = adds_di_test2 (0x540004100ll,
6284
+ if (y != 0x1065055309)
6287
+ y = adds_di_test3 (0x130000029ll,
6290
+ if (y != 0x9b9050576)
6293
+ y = adds_di_test3 (0x130002900ll,
6296
+ if (y != 0xafd052e4d)
6302
+/* { dg-final { cleanup-saved-temps } } */
6303
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
6304
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
6306
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
6308
#define FTYPE double
6313
#include "vect-fcm.x"
6315
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
6316
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
6317
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
6318
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
6319
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
6320
--- a/src/gcc/testsuite/lib/target-supports.exp
6321
+++ b/src/gcc/testsuite/lib/target-supports.exp
6322
@@ -2012,6 +2012,7 @@
6323
|| ([istarget powerpc*-*-*]
6324
&& ![istarget powerpc-*-linux*paired*])
6325
|| [istarget x86_64-*-*]
6326
+ || [istarget aarch64*-*-*]
6327
|| ([istarget arm*-*-*]
6328
&& [check_effective_target_arm_neon_ok])} {
6329
set et_vect_uintfloat_cvt_saved 1
6330
@@ -2147,22 +2148,6 @@
6334
-# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
6335
-# -mfloat-abi=softfp
6336
-proc check_effective_target_arm_v8_neon_ok {} {
6337
- if { [check_effective_target_arm32] } {
6338
- return [check_no_compiler_messages arm_v8_neon_ok object {
6341
- __asm__ volatile ("vrintn.f32 q0, q0");
6344
- } "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"]
6350
# Return 1 if this is an ARM target supporting -mfpu=vfp
6351
# -mfloat-abi=hard. Some multilibs may be incompatible with these
6353
@@ -2226,7 +2211,8 @@
6354
if { ! [check_effective_target_arm_v8_neon_ok] } {
6357
- return "$flags -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=softfp"
6358
+ global et_arm_v8_neon_flags
6359
+ return "$flags $et_arm_v8_neon_flags -march=armv8-a"
6362
# Add the options needed for NEON. We need either -mfloat-abi=softfp
6363
@@ -2270,6 +2256,79 @@
6364
check_effective_target_arm_neon_ok_nocache]
6367
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
6368
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
6369
+# incompatible with these options. Also set et_arm_neon_flags to the
6370
+# best options to add.
6372
+proc check_effective_target_arm_neon_fp16_ok_nocache { } {
6373
+ global et_arm_neon_fp16_flags
6374
+ set et_arm_neon_fp16_flags ""
6375
+ if { [check_effective_target_arm32] } {
6376
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
6377
+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
6378
+ if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
6379
+ #include "arm_neon.h"
6381
+ foo (float32x4_t arg)
6383
+ return vcvt_f16_f32 (arg);
6386
+ set et_arm_neon_fp16_flags $flags
6395
+proc check_effective_target_arm_neon_fp16_ok { } {
6396
+ return [check_cached_effective_target arm_neon_fp16_ok \
6397
+ check_effective_target_arm_neon_fp16_ok_nocache]
6400
+proc add_options_for_arm_neon_fp16 { flags } {
6401
+ if { ! [check_effective_target_arm_neon_fp16_ok] } {
6404
+ global et_arm_neon_fp16_flags
6405
+ return "$flags $et_arm_neon_fp16_flags"
6408
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
6409
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
6410
+# incompatible with these options. Also set et_arm_v8_neon_flags to the
6411
+# best options to add.
6413
+proc check_effective_target_arm_v8_neon_ok_nocache { } {
6414
+ global et_arm_v8_neon_flags
6415
+ set et_arm_v8_neon_flags ""
6416
+ if { [check_effective_target_arm32] } {
6417
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
6418
+ if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
6419
+ #include "arm_neon.h"
6423
+ __asm__ volatile ("vrintn.f32 q0, q0");
6426
+ set et_arm_v8_neon_flags $flags
6435
+proc check_effective_target_arm_v8_neon_ok { } {
6436
+ return [check_cached_effective_target arm_v8_neon_ok \
6437
+ check_effective_target_arm_v8_neon_ok_nocache]
6440
# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
6441
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
6442
# incompatible with these options. Also set et_arm_neonv2_flags to the
6443
@@ -2509,6 +2568,24 @@
6444
} [add_options_for_arm_neonv2 ""]]
6447
+# Return 1 if the target supports executing ARMv8 NEON instructions, 0
6450
+proc check_effective_target_arm_v8_neon_hw { } {
6451
+ return [check_runtime arm_v8_neon_hw_available {
6452
+ #include "arm_neon.h"
6457
+ asm ("vrinta.f32 %P0, %P1"
6462
+ } [add_options_for_arm_v8_neon ""]]
6465
# Return 1 if this is a ARM target with NEON enabled.
6467
proc check_effective_target_arm_neon { } {
6468
--- a/src/gcc/testsuite/ChangeLog.linaro
6469
+++ b/src/gcc/testsuite/ChangeLog.linaro
6471
+2013-06-05 Christophe Lyon <christophe.lyon@linaro.org>
6473
+ Backport from trunk r199658.
6474
+ 2013-06-04 Ian Bolton <ian.bolton@arm.com>
6476
+ * gcc.target/aarch64/movi_1.c: New test.
6478
+2013-06-04 Christophe Lyon <christophe.lyon@linaro.org>
6480
+ Backport from trunk r199261.
6481
+ 2013-05-23 Christian Bruel <christian.bruel@st.com>
6484
+ * gcc.dg/debug/pr57351.c: New test
6486
+2013-06-03 Christophe Lyon <christophe.lyon@linaro.org>
6487
+ Backport from trunk r198890,199254,199294,199454.
6489
+ 2013-05-30 Ian Bolton <ian.bolton@arm.com>
6491
+ * gcc.target/aarch64/insv_1.c: New test.
6493
+ 2013-05-24 Ian Bolton <ian.bolton@arm.com>
6495
+ * gcc.target/aarch64/scalar_intrinsics.c
6496
+ (force_simd): Use a valid instruction.
6497
+ (test_vdupd_lane_s64): Pass a valid lane argument.
6498
+ (test_vdupd_lane_u64): Likewise.
6500
+ 2013-05-23 Vidya Praveen <vidyapraveen@arm.com>
6502
+ * gcc.target/aarch64/vect-clz.c: New file.
6504
+ 2013-05-14 James Greenhalgh <james.greenhalgh@arm.com>
6506
+ * gcc.target/aarch64/vect-fcm.x: Add cases testing
6507
+ FLOAT cmp FLOAT ? INT : INT.
6508
+ * gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE.
6509
+ * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
6510
+ * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
6511
+ * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
6512
+ * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
6513
+ * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
6515
+2013-05-29 Christophe Lyon <christophe.lyon@linaro.org>
6517
+ Backport from trunk r198928.
6518
+ 2013-05-15 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
6521
+ * gcc.target/arm/pr40887.c: Adjust testcase.
6522
+ * gcc.target/arm/pr19599.c: New test.
6524
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
6526
+ Backport from trunk r198680.
6527
+ 2013-05-07 Sofiane Naci <sofiane.naci@arm.com>
6529
+ * gcc.target/aarch64/scalar_intrinsics.c: Update.
6531
+2013-05-28 Christophe Lyon <christophe.lyon@linaro.org>
6533
+ Backport from trunk r198499-198500.
6534
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
6535
+ * gcc.target/aarch64/vect-vaddv.c: New.
6537
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
6539
+ * gcc.target/aarch64/vect-vmaxv.c: New.
6540
+ * gcc.target/aarch64/vect-vfmaxv.c: Likewise.
6542
+2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
6544
+ Backport from trunk r198970.
6545
+ 2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
6547
+ * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output.
6548
+ * gcc.target/arm/unaligned-memcpy-3.c: Likewise.
6549
+ * gcc.target/arm/unaligned-memcpy-4.c: Likewise.
6551
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6553
+ GCC Linaro 4.8-2013.05 released.
6555
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6557
+ Backport from trunk r198574-198575.
6558
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
6560
+ * gcc.target/aarch64/fabd.c: New file.
6562
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6564
+ Backport from trunk r198490-198496.
6565
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
6567
+ * gcc.target/aarch64/scalar-vca.c: New.
6568
+ * gcc.target/aarch64/vect-vca.c: Likewise.
6570
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
6572
+ * gcc.target/aarch64/scalar_intrinsics.c (force_simd): New.
6573
+ (test_vceqd_s64): Force arguments to SIMD registers.
6574
+ (test_vceqzd_s64): Likewise.
6575
+ (test_vcged_s64): Likewise.
6576
+ (test_vcled_s64): Likewise.
6577
+ (test_vcgezd_s64): Likewise.
6578
+ (test_vcged_u64): Likewise.
6579
+ (test_vcgtd_s64): Likewise.
6580
+ (test_vcltd_s64): Likewise.
6581
+ (test_vcgtzd_s64): Likewise.
6582
+ (test_vcgtd_u64): Likewise.
6583
+ (test_vclezd_s64): Likewise.
6584
+ (test_vcltzd_s64): Likewise.
6585
+ (test_vtst_s64): Likewise.
6586
+ (test_vtst_u64): Likewise.
6588
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6590
+ Backport from trunk r198191.
6591
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
6593
+ * gcc.target/aarch64/scalar-mov.c: New testcase.
6595
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6597
+ Backport from trunk r197838.
6598
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6600
+ * gcc.target/aarch64/negs.c: New.
6602
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6604
+ Backport from trunk r198019.
6605
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6607
+ * gcc.target/aarch64/adds1.c: New.
6608
+ * gcc.target/aarch64/adds2.c: New.
6609
+ * gcc.target/aarch64/subs1.c: New.
6610
+ * gcc.target/aarch64/subs2.c: New.
6612
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6614
+ Backport from trunk r198394,198396-198400,198402-198404,198406.
6615
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
6617
+ * lib/target-supports.exp (vect_uintfloat_cvt): Enable for AArch64.
6619
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
6621
+ * gcc.target/aarch64/vect-vcvt.c: New.
6623
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
6625
+ * gcc.target/aarch64/vect-vrnd.c: New.
6627
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6629
+ Backport from trunk r198302-198306,198316.
6630
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
6631
+ Tejas Belagod <tejas.belagod@arm.com>
6633
+ * gcc.target/aarch64/vaddv-intrinsic.c: New.
6634
+ * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
6635
+ * gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
6637
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6639
+ * gcc.target/aarch64/cmp.c: New.
6641
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6643
+ * gcc.target/aarch64/ngc.c: New.
6645
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6647
+ Backport from trunk r198298.
6648
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
6650
+ * lib/target-supports.exp
6651
+ (check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
6652
+ (check_effective_target_arm_neon_fp16_ok): Likewise.
6653
+ (add_options_for_arm_neon_fp16): Likewise.
6654
+ * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
6655
+ * gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
6657
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6659
+ Backport from trunk r198136-198137,198142,198176
6660
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
6662
+ * gcc.target/aarch64/vrecps.c: New.
6663
+ * gcc.target/aarch64/vrecpx.c: Likewise.
6665
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6667
+ Backport from trunk r198020.
6668
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6670
+ * gcc.target/aarch64/adds3.c: New.
6671
+ * gcc.target/aarch64/subs3.c: New.
6673
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6675
+ Backport from trunk r197965.
6676
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
6678
+ * gcc.target/arm/anddi3-opt.c: New test.
6679
+ * gcc.target/arm/anddi3-opt2.c: Likewise.
6681
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6683
+ Backport from trunk r197642.
6684
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
6686
+ * gcc.target/arm/minmax_minus.c: New test.
6688
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6690
+ Backport from trunk r197530,197921.
6691
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
6693
+ * gcc.target/arm/peep-ldrd-1.c: New test.
6694
+ * gcc.target/arm/peep-strd-1.c: Likewise.
6696
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6698
+ Backport from trunk r197523.
6699
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
6701
+ * lib/target-supports.exp (add_options_for_arm_v8_neon):
6702
+ Add -march=armv8-a when we use v8 NEON.
6703
+ (check_effective_target_vect_call_btruncf): Remove arm-*-*-*.
6704
+ (check_effective_target_vect_call_ceilf): Likewise.
6705
+ (check_effective_target_vect_call_floorf): Likewise.
6706
+ (check_effective_target_vect_call_roundf): Likewise.
6707
+ (check_vect_support_and_set_flags): Remove check for arm_v8_neon.
6708
+ * gcc.target/arm/vect-rounding-btruncf.c: New testcase.
6709
+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise.
6710
+ * gcc.target/arm/vect-rounding-floorf.c: Likewise.
6711
+ * gcc.target/arm/vect-rounding-roundf.c: Likewise.
6713
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6715
+ Backport from trunk r197518-197522,197516-197528.
6716
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
6718
+ * gcc.target/arm/negdi-1.c: New test.
6719
+ * gcc.target/arm/negdi-2.c: Likewise.
6720
+ * gcc.target/arm/negdi-3.c: Likewise.
6721
+ * gcc.target/arm/negdi-4.c: Likewise.
6723
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6725
+ Backport from trunk r197489-197491.
6726
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
6728
+ * lib/target-supports.exp (check_effective_target_arm_v8_neon_hw):
6730
+ (check_effective_target_arm_v8_neon_ok_nocache):
6732
+ (check_effective_target_arm_v8_neon_ok): Change to use
6733
+ check_effective_target_arm_v8_neon_ok_nocache.
6734
+ (add_options_for_arm_v8_neon): Use et_arm_v8_neon_flags to set ARMv8
6736
+ (check_effective_target_vect_call_btruncf):
6737
+ Enable for arm and ARMv8 NEON.
6738
+ (check_effective_target_vect_call_ceilf): Likewise.
6739
+ (check_effective_target_vect_call_floorf): Likewise.
6740
+ (check_effective_target_vect_call_roundf): Likewise.
6741
+ (check_vect_support_and_set_flags): Handle ARMv8 NEON effective
6744
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6746
+ Backport from trunk r196795-196797,196957.
6747
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
6749
+ * gcc.target/aarch64/sbc.c: New test.
6751
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
6753
+ * gcc.target/aarch64/ror.c: New test.
6755
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
6757
+ * gcc.target/aarch64/extr.c: New test.
6759
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6761
+ * GCC Linaro 4.8-2013.04 released.
6763
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6765
+ Backport from trunk r197052.
6766
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
6768
+ * gcc.target/arm/vseleqdf.c: New test.
6769
+ * gcc.target/arm/vseleqsf.c: Likewise.
6770
+ * gcc.target/arm/vselgedf.c: Likewise.
6771
+ * gcc.target/arm/vselgesf.c: Likewise.
6772
+ * gcc.target/arm/vselgtdf.c: Likewise.
6773
+ * gcc.target/arm/vselgtsf.c: Likewise.
6774
+ * gcc.target/arm/vselledf.c: Likewise.
6775
+ * gcc.target/arm/vsellesf.c: Likewise.
6776
+ * gcc.target/arm/vselltdf.c: Likewise.
6777
+ * gcc.target/arm/vselltsf.c: Likewise.
6778
+ * gcc.target/arm/vselnedf.c: Likewise.
6779
+ * gcc.target/arm/vselnesf.c: Likewise.
6780
+ * gcc.target/arm/vselvcdf.c: Likewise.
6781
+ * gcc.target/arm/vselvcsf.c: Likewise.
6782
+ * gcc.target/arm/vselvsdf.c: Likewise.
6783
+ * gcc.target/arm/vselvssf.c: Likewise.
6785
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6787
+ Backport from trunk r197051.
6788
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
6790
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Move test
6792
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.x: ... to here.
6793
+ * gcc.target/aarch64/atomic-op-acq_rel.c: Move test body from here...
6794
+ * gcc.target/aarch64/atomic-op-acq_rel.x: ... to here.
6795
+ * gcc.target/aarch64/atomic-op-acquire.c: Move test body from here...
6796
+ * gcc.target/aarch64/atomic-op-acquire.x: ... to here.
6797
+ * gcc.target/aarch64/atomic-op-char.c: Move test body from here...
6798
+ * gcc.target/aarch64/atomic-op-char.x: ... to here.
6799
+ * gcc.target/aarch64/atomic-op-consume.c: Move test body from here...
6800
+ * gcc.target/aarch64/atomic-op-consume.x: ... to here.
6801
+ * gcc.target/aarch64/atomic-op-int.c: Move test body from here...
6802
+ * gcc.target/aarch64/atomic-op-int.x: ... to here.
6803
+ * gcc.target/aarch64/atomic-op-relaxed.c: Move test body from here...
6804
+ * gcc.target/aarch64/atomic-op-relaxed.x: ... to here.
6805
+ * gcc.target/aarch64/atomic-op-release.c: Move test body from here...
6806
+ * gcc.target/aarch64/atomic-op-release.x: ... to here.
6807
+ * gcc.target/aarch64/atomic-op-seq_cst.c: Move test body from here...
6808
+ * gcc.target/aarch64/atomic-op-seq_cst.x: ... to here.
6809
+ * gcc.target/aarch64/atomic-op-short.c: Move test body from here...
6810
+ * gcc.target/aarch64/atomic-op-short.x: ... to here.
6811
+ * gcc.target/arm/atomic-comp-swap-release-acquire.c: New test.
6812
+ * gcc.target/arm/atomic-op-acq_rel.c: Likewise.
6813
+ * gcc.target/arm/atomic-op-acquire.c: Likewise.
6814
+ * gcc.target/arm/atomic-op-char.c: Likewise.
6815
+ * gcc.target/arm/atomic-op-consume.c: Likewise.
6816
+ * gcc.target/arm/atomic-op-int.c: Likewise.
6817
+ * gcc.target/arm/atomic-op-relaxed.c: Likewise.
6818
+ * gcc.target/arm/atomic-op-release.c: Likewise.
6819
+ * gcc.target/arm/atomic-op-seq_cst.c: Likewise.
6820
+ * gcc.target/arm/atomic-op-short.c: Likewise.
6822
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6824
+ Backport from trunk r196876.
6825
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
6827
+ * gcc.target/arm/neon-for-64bits-1.c: New tests.
6828
+ * gcc.target/arm/neon-for-64bits-2.c: Likewise.
6830
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6832
+ Backport from trunk r196858.
6833
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
6835
+ * gcc.target/aarch64/vect.c: Test and result vector added
6836
+ for sabd and saba instructions.
6837
+ * gcc.target/aarch64/vect-compile.c: Check for sabd and saba
6838
+ instructions in assembly.
6839
+ * gcc.target/aarch64/vect.x: Add sabd and saba test functions.
6840
+ * gcc.target/aarch64/vect-fp.c: Test and result vector added
6841
+ for fabd instruction.
6842
+ * gcc.target/aarch64/vect-fp-compile.c: Check for fabd
6843
+ instruction in assembly.
6844
+ * gcc.target/aarch64/vect-fp.x: Add fabd test function.
6845
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c
6846
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c
6848
+/* { dg-do compile } */
6849
+/* { dg-options "-O2 -g" } */
6851
+extern int * alloca (int);
6861
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c
6862
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c
6864
+/* { dg-do compile } */
6865
+/* { dg-options "-O2 -g" } */
6867
+#include <stdlib.h>
6869
+#include <stdarg.h>
6871
+#define DEBUG_BUFFER_SIZE 80
6872
+int unifi_debug = 5;
6875
+unifi_trace (void* ospriv, int level, const char *fmt, ...)
6877
+ static char s[DEBUG_BUFFER_SIZE];
6884
+ if (unifi_debug >= level)
6886
+ va_start (args, fmt);
6887
+ len = vsnprintf (&(s)[0], (DEBUG_BUFFER_SIZE), fmt, args);
6890
+ if (len >= DEBUG_BUFFER_SIZE)
6892
+ (s)[DEBUG_BUFFER_SIZE - 2] = '\n';
6893
+ (s)[DEBUG_BUFFER_SIZE - 1] = 0;
6900
--- a/src/gcc/testsuite/gcc.dg/debug/pr57351.c
6901
+++ b/src/gcc/testsuite/gcc.dg/debug/pr57351.c
6903
+/* { dg-do compile } */
6904
+/* { dg-require-effective-target arm_neon } */
6905
+/* { dg-options "-std=c99 -Os -g -march=armv7-a" } */
6906
+/* { dg-add-options arm_neon } */
6908
+typedef unsigned int size_t;
6909
+typedef int ptrdiff_t;
6910
+typedef signed char int8_t ;
6911
+typedef signed long long int64_t;
6912
+typedef int8_t GFC_INTEGER_1;
6913
+typedef GFC_INTEGER_1 GFC_LOGICAL_1;
6914
+typedef int64_t GFC_INTEGER_8;
6915
+typedef GFC_INTEGER_8 GFC_LOGICAL_8;
6916
+typedef ptrdiff_t index_type;
6917
+typedef struct descriptor_dimension
6919
+ index_type lower_bound;
6920
+ index_type _ubound;
6922
+descriptor_dimension;
6923
+typedef struct { GFC_LOGICAL_1 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l1;
6924
+typedef struct { GFC_LOGICAL_8 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l8;
6926
+all_l8 (gfc_array_l8 * const restrict retarray,
6927
+ gfc_array_l1 * const restrict array,
6928
+ const index_type * const restrict pdim)
6930
+ GFC_LOGICAL_8 * restrict dest;
6935
+ dim = (*pdim) - 1;
6936
+ len = ((array)->dim[dim]._ubound + 1 - (array)->dim[dim].lower_bound);
6937
+ for (n = 0; n < dim; n++)
6939
+ const GFC_LOGICAL_1 * restrict src;
6940
+ GFC_LOGICAL_8 result;
6944
+ for (n = 0; n < len; n++, src += delta)
6957
--- a/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c
6958
+++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c
6960
+/* { dg-do compile } */
6961
+/* { dg-options "-O2 -g" } */
6963
+unsigned char a, b, d, f, g;
6970
+ if (c == 0) return test ();
6974
+ int e = (a & 0x0f) - (g & 0x0f);
6976
+ if (!a) b |= 0x80;
6978
+ f = g/5 + a*3879 + b *2985;
6982
+ f = g + a*39879 + b *25;
6986
--- a/src/gcc/objcp/ChangeLog.linaro
6987
+++ b/src/gcc/objcp/ChangeLog.linaro
6989
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6991
+ GCC Linaro 4.8-2013.05 released.
6993
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
6995
+ * GCC Linaro 4.8-2013.04 released.
6996
--- a/src/gcc/cp/ChangeLog.linaro
6997
+++ b/src/gcc/cp/ChangeLog.linaro
6999
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7001
+ GCC Linaro 4.8-2013.05 released.
7003
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7005
+ * GCC Linaro 4.8-2013.04 released.
7006
--- a/src/gcc/go/ChangeLog.linaro
7007
+++ b/src/gcc/go/ChangeLog.linaro
7009
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7011
+ GCC Linaro 4.8-2013.05 released.
7013
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7015
+ * GCC Linaro 4.8-2013.04 released.
7016
--- a/src/gcc/ada/ChangeLog.linaro
7017
+++ b/src/gcc/ada/ChangeLog.linaro
7019
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7021
+ GCC Linaro 4.8-2013.05 released.
7023
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7025
+ * GCC Linaro 4.8-2013.04 released.
7026
--- a/src/gcc/common/config/aarch64/aarch64-common.c
7027
+++ b/src/gcc/common/config/aarch64/aarch64-common.c
7030
/* Enable section anchors by default at -O1 or higher. */
7031
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
7032
+ /* Enable redundant extension instructions removal at -O2 and higher. */
7033
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
7034
{ OPT_LEVELS_NONE, 0, NULL, 0 }
7037
--- a/src/gcc/fortran/ChangeLog.linaro
7038
+++ b/src/gcc/fortran/ChangeLog.linaro
7040
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7042
+ GCC Linaro 4.8-2013.05 released.
7044
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7046
+ * GCC Linaro 4.8-2013.04 released.
7047
--- a/src/gcc/configure.ac
7048
+++ b/src/gcc/configure.ac
7051
AC_SUBST(CONFIGURE_SPECS)
7053
-ACX_PKGVERSION([GCC])
7054
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
7055
ACX_BUGURL([http://gcc.gnu.org/bugs.html])
7057
# Sanity check enable_languages in case someone does not run the toplevel
7058
--- a/src/gcc/coretypes.h
7059
+++ b/src/gcc/coretypes.h
7061
typedef union gimple_statement_d *gimple;
7062
typedef const union gimple_statement_d *const_gimple;
7063
typedef gimple gimple_seq;
7064
+struct gimple_stmt_iterator_d;
7065
+typedef struct gimple_stmt_iterator_d gimple_stmt_iterator;
7067
typedef union section section;
7069
--- a/src/gcc/gimple-fold.c
7070
+++ b/src/gcc/gimple-fold.c
7071
@@ -1143,6 +1143,8 @@
7072
gimplify_and_update_call_from_tree (gsi, result);
7075
+ else if (DECL_BUILT_IN_CLASS (callee) == BUILT_IN_MD)
7076
+ changed |= targetm.gimple_fold_builtin (gsi);
7080
--- a/src/gcc/lto/ChangeLog.linaro
7081
+++ b/src/gcc/lto/ChangeLog.linaro
7083
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7085
+ GCC Linaro 4.8-2013.05 released.
7087
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7089
+ * GCC Linaro 4.8-2013.04 released.
7090
--- a/src/gcc/po/ChangeLog.linaro
7091
+++ b/src/gcc/po/ChangeLog.linaro
7093
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7095
+ GCC Linaro 4.8-2013.05 released.
7097
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
7099
+ * GCC Linaro 4.8-2013.04 released.
7100
--- a/src/gcc/gimple.h
7101
+++ b/src/gcc/gimple.h
7104
/* Iterator object for GIMPLE statement sequences. */
7107
+struct gimple_stmt_iterator_d
7109
/* Sequence node holding the current statement. */
7110
gimple_seq_node ptr;
7112
block/sequence is removed. */
7115
-} gimple_stmt_iterator;
7119
/* Data structure definitions for GIMPLE tuples. NOTE: word markers
7120
are for 64 bit hosts. */
7122
--- a/src/gcc/config/aarch64/aarch64-simd.md
7123
+++ b/src/gcc/config/aarch64/aarch64-simd.md
7126
; Main data types used by the insntructions
7128
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
7129
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
7130
(const_string "unknown"))
7134
; simd_dup duplicate element.
7135
; simd_dupgp duplicate general purpose register.
7136
; simd_ext bitwise extract from pair.
7137
+; simd_fabd floating point absolute difference.
7138
; simd_fadd floating point add/sub.
7139
; simd_fcmp floating point compare.
7140
; simd_fcvti floating point convert to integer.
7142
; simd_fmul floating point multiply.
7143
; simd_fmul_elt floating point multiply (by element).
7144
; simd_fnegabs floating point neg/abs.
7145
-; simd_frcpe floating point reciprocal estimate.
7146
-; simd_frcps floating point reciprocal step.
7147
-; simd_frecx floating point reciprocal exponent.
7148
+; simd_frecpe floating point reciprocal estimate.
7149
+; simd_frecps floating point reciprocal step.
7150
+; simd_frecpx floating point reciprocal exponent.
7151
; simd_frint floating point round to integer.
7152
; simd_fsqrt floating point square root.
7153
; simd_icvtf integer convert to floating point.
7176
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
7177
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
7178
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
7179
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
7180
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
7181
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
7182
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
7183
(eq_attr "simd_type" "none") (const_string "none")
7185
(const_string "unknown")))
7189
(define_insn "aarch64_dup_lane<mode>"
7190
- [(set (match_operand:SDQ_I 0 "register_operand" "=w")
7191
+ [(set (match_operand:ALLX 0 "register_operand" "=w")
7193
(match_operand:<VCON> 1 "register_operand" "w")
7194
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
7195
@@ -367,6 +369,19 @@
7196
(set_attr "simd_mode" "<MODE>")]
7199
+(define_insn "aarch64_dup_lanedi"
7200
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
7202
+ (match_operand:V2DI 1 "register_operand" "w,w")
7203
+ (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
7206
+ dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]
7207
+ umov\t%0, %1.d[%2]"
7208
+ [(set_attr "simd_type" "simd_dup")
7209
+ (set_attr "simd_mode" "DI")]
7212
(define_insn "aarch64_simd_dup<mode>"
7213
[(set (match_operand:VDQF 0 "register_operand" "=w")
7214
(vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
7216
case 4: return "ins\t%0.d[0], %1";
7217
case 5: return "mov\t%0, %1";
7219
- return aarch64_output_simd_mov_immediate (&operands[1],
7220
+ return aarch64_output_simd_mov_immediate (operands[1],
7222
default: gcc_unreachable ();
7224
@@ -417,13 +432,13 @@
7225
case 0: return "ld1\t{%0.<Vtype>}, %1";
7226
case 1: return "st1\t{%1.<Vtype>}, %0";
7227
case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
7228
- case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
7229
- case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
7230
+ case 3: return "#";
7231
+ case 4: return "#";
7234
- return aarch64_output_simd_mov_immediate (&operands[1],
7236
- default: gcc_unreachable ();
7237
+ return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
7239
+ gcc_unreachable ();
7242
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
7243
@@ -452,6 +467,105 @@
7244
aarch64_simd_disambiguate_copy (operands, dest, src, 2);
7248
+ [(set (match_operand:VQ 0 "register_operand" "")
7249
+ (match_operand:VQ 1 "register_operand" ""))]
7250
+ "TARGET_SIMD && reload_completed
7251
+ && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
7252
+ || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
7255
+ aarch64_split_simd_move (operands[0], operands[1]);
7259
+(define_expand "aarch64_simd_mov<mode>"
7260
+ [(set (match_operand:VQ 0)
7261
+ (match_operand:VQ 1))]
7264
+ rtx dst = operands[0];
7265
+ rtx src = operands[1];
7267
+ if (GP_REGNUM_P (REGNO (src)))
7269
+ rtx low_part = gen_lowpart (<VHALF>mode, src);
7270
+ rtx high_part = gen_highpart (<VHALF>mode, src);
7273
+ (gen_aarch64_simd_mov_to_<mode>low (dst, low_part));
7275
+ (gen_aarch64_simd_mov_to_<mode>high (dst, high_part));
7280
+ rtx low_half = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
7281
+ rtx high_half = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
7282
+ rtx low_part = gen_lowpart (<VHALF>mode, dst);
7283
+ rtx high_part = gen_highpart (<VHALF>mode, dst);
7286
+ (gen_aarch64_simd_mov_from_<mode>low (low_part, src, low_half));
7288
+ (gen_aarch64_simd_mov_from_<mode>high (high_part, src, high_half));
7294
+(define_insn "aarch64_simd_mov_to_<mode>low"
7295
+ [(set (zero_extract:VQ
7296
+ (match_operand:VQ 0 "register_operand" "+w")
7297
+ (const_int 64) (const_int 0))
7299
+ (match_operand:<VHALF> 1 "register_operand" "r")
7300
+ (vec_duplicate:<VHALF> (const_int 0))))]
7301
+ "TARGET_SIMD && reload_completed"
7302
+ "ins\t%0.d[0], %1"
7303
+ [(set_attr "simd_type" "simd_move")
7304
+ (set_attr "simd_mode" "<MODE>")
7305
+ (set_attr "length" "4")
7308
+(define_insn "aarch64_simd_mov_to_<mode>high"
7309
+ [(set (zero_extract:VQ
7310
+ (match_operand:VQ 0 "register_operand" "+w")
7311
+ (const_int 64) (const_int 64))
7313
+ (match_operand:<VHALF> 1 "register_operand" "r")
7314
+ (vec_duplicate:<VHALF> (const_int 0))))]
7315
+ "TARGET_SIMD && reload_completed"
7316
+ "ins\t%0.d[1], %1"
7317
+ [(set_attr "simd_type" "simd_move")
7318
+ (set_attr "simd_mode" "<MODE>")
7319
+ (set_attr "length" "4")
7322
+(define_insn "aarch64_simd_mov_from_<mode>low"
7323
+ [(set (match_operand:<VHALF> 0 "register_operand" "=r")
7324
+ (vec_select:<VHALF>
7325
+ (match_operand:VQ 1 "register_operand" "w")
7326
+ (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
7327
+ "TARGET_SIMD && reload_completed"
7328
+ "umov\t%0, %1.d[0]"
7329
+ [(set_attr "simd_type" "simd_move")
7330
+ (set_attr "simd_mode" "<MODE>")
7331
+ (set_attr "length" "4")
7334
+(define_insn "aarch64_simd_mov_from_<mode>high"
7335
+ [(set (match_operand:<VHALF> 0 "register_operand" "=r")
7336
+ (vec_select:<VHALF>
7337
+ (match_operand:VQ 1 "register_operand" "w")
7338
+ (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
7339
+ "TARGET_SIMD && reload_completed"
7340
+ "umov\t%0, %1.d[1]"
7341
+ [(set_attr "simd_type" "simd_move")
7342
+ (set_attr "simd_mode" "<MODE>")
7343
+ (set_attr "length" "4")
7346
(define_insn "orn<mode>3"
7347
[(set (match_operand:VDQ 0 "register_operand" "=w")
7348
(ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
7352
(define_insn "neg<mode>2"
7353
- [(set (match_operand:VDQM 0 "register_operand" "=w")
7354
- (neg:VDQM (match_operand:VDQM 1 "register_operand" "w")))]
7355
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
7356
+ (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
7358
"neg\t%0.<Vtype>, %1.<Vtype>"
7359
[(set_attr "simd_type" "simd_negabs")
7360
@@ -520,6 +634,51 @@
7361
(set_attr "simd_mode" "<MODE>")]
7364
+(define_insn "abd<mode>_3"
7365
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
7366
+ (abs:VDQ_BHSI (minus:VDQ_BHSI
7367
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
7368
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
7370
+ "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7371
+ [(set_attr "simd_type" "simd_abd")
7372
+ (set_attr "simd_mode" "<MODE>")]
7375
+(define_insn "aba<mode>_3"
7376
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
7377
+ (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
7378
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
7379
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")))
7380
+ (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
7382
+ "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7383
+ [(set_attr "simd_type" "simd_abd")
7384
+ (set_attr "simd_mode" "<MODE>")]
7387
+(define_insn "fabd<mode>_3"
7388
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
7389
+ (abs:VDQF (minus:VDQF
7390
+ (match_operand:VDQF 1 "register_operand" "w")
7391
+ (match_operand:VDQF 2 "register_operand" "w"))))]
7393
+ "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7394
+ [(set_attr "simd_type" "simd_fabd")
7395
+ (set_attr "simd_mode" "<MODE>")]
7398
+(define_insn "*fabd_scalar<mode>3"
7399
+ [(set (match_operand:GPF 0 "register_operand" "=w")
7400
+ (abs:GPF (minus:GPF
7401
+ (match_operand:GPF 1 "register_operand" "w")
7402
+ (match_operand:GPF 2 "register_operand" "w"))))]
7404
+ "fabd\t%<s>0, %<s>1, %<s>2"
7405
+ [(set_attr "simd_type" "simd_fabd")
7406
+ (set_attr "mode" "<MODE>")]
7409
(define_insn "and<mode>3"
7410
[(set (match_operand:VDQ 0 "register_operand" "=w")
7411
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
7412
@@ -904,12 +1063,12 @@
7415
;; Max/Min operations.
7416
-(define_insn "<maxmin><mode>3"
7417
+(define_insn "<su><maxmin><mode>3"
7418
[(set (match_operand:VQ_S 0 "register_operand" "=w")
7419
(MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
7420
(match_operand:VQ_S 2 "register_operand" "w")))]
7422
- "<maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7423
+ "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7424
[(set_attr "simd_type" "simd_minmax")
7425
(set_attr "simd_mode" "<MODE>")]
7427
@@ -1196,7 +1355,9 @@
7428
(set_attr "simd_mode" "<MODE>")]
7431
-(define_insn "aarch64_frint<frint_suffix><mode>"
7432
+;; Vector versions of the floating-point frint patterns.
7433
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
7434
+(define_insn "<frint_pattern><mode>2"
7435
[(set (match_operand:VDQF 0 "register_operand" "=w")
7436
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
7438
@@ -1206,16 +1367,9 @@
7439
(set_attr "simd_mode" "<MODE>")]
7442
-;; Vector versions of the floating-point frint patterns.
7443
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
7444
-(define_expand "<frint_pattern><mode>2"
7445
- [(set (match_operand:VDQF 0 "register_operand")
7446
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
7451
-(define_insn "aarch64_fcvt<frint_suffix><su><mode>"
7452
+;; Vector versions of the fcvt standard patterns.
7453
+;; Expands to lbtrunc, lround, lceil, lfloor
7454
+(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
7455
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
7456
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
7457
[(match_operand:VDQF 1 "register_operand" "w")]
7458
@@ -1226,16 +1380,141 @@
7459
(set_attr "simd_mode" "<MODE>")]
7462
-;; Vector versions of the fcvt standard patterns.
7463
-;; Expands to lbtrunc, lround, lceil, lfloor
7464
-(define_expand "l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2"
7465
+(define_expand "<optab><VDQF:mode><fcvt_target>2"
7466
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
7467
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
7468
[(match_operand:VDQF 1 "register_operand")]
7474
+(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
7475
+ [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
7476
+ (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
7477
+ [(match_operand:VDQF 1 "register_operand")]
7482
+(define_expand "ftrunc<VDQF:mode>2"
7483
+ [(set (match_operand:VDQF 0 "register_operand")
7484
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
7489
+(define_insn "<optab><fcvt_target><VDQF:mode>2"
7490
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
7492
+ (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
7494
+ "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
7495
+ [(set_attr "simd_type" "simd_icvtf")
7496
+ (set_attr "simd_mode" "<MODE>")]
7499
+;; Conversions between vectors of floats and doubles.
7500
+;; Contains a mix of patterns to match standard pattern names
7501
+;; and those for intrinsics.
7503
+;; Float widening operations.
7505
+(define_insn "vec_unpacks_lo_v4sf"
7506
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
7507
+ (float_extend:V2DF
7509
+ (match_operand:V4SF 1 "register_operand" "w")
7510
+ (parallel [(const_int 0) (const_int 1)])
7513
+ "fcvtl\\t%0.2d, %1.2s"
7514
+ [(set_attr "simd_type" "simd_fcvtl")
7515
+ (set_attr "simd_mode" "V2DF")]
7518
+(define_insn "aarch64_float_extend_lo_v2df"
7519
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
7520
+ (float_extend:V2DF
7521
+ (match_operand:V2SF 1 "register_operand" "w")))]
7523
+ "fcvtl\\t%0.2d, %1.2s"
7524
+ [(set_attr "simd_type" "simd_fcvtl")
7525
+ (set_attr "simd_mode" "V2DF")]
7528
+(define_insn "vec_unpacks_hi_v4sf"
7529
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
7530
+ (float_extend:V2DF
7532
+ (match_operand:V4SF 1 "register_operand" "w")
7533
+ (parallel [(const_int 2) (const_int 3)])
7536
+ "fcvtl2\\t%0.2d, %1.4s"
7537
+ [(set_attr "simd_type" "simd_fcvtl")
7538
+ (set_attr "simd_mode" "V2DF")]
7541
+;; Float narrowing operations.
7543
+(define_insn "aarch64_float_truncate_lo_v2sf"
7544
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
7545
+ (float_truncate:V2SF
7546
+ (match_operand:V2DF 1 "register_operand" "w")))]
7548
+ "fcvtn\\t%0.2s, %1.2d"
7549
+ [(set_attr "simd_type" "simd_fcvtl")
7550
+ (set_attr "simd_mode" "V2SF")]
7553
+(define_insn "aarch64_float_truncate_hi_v4sf"
7554
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
7556
+ (match_operand:V2SF 1 "register_operand" "0")
7557
+ (float_truncate:V2SF
7558
+ (match_operand:V2DF 2 "register_operand" "w"))))]
7560
+ "fcvtn2\\t%0.4s, %2.2d"
7561
+ [(set_attr "simd_type" "simd_fcvtl")
7562
+ (set_attr "simd_mode" "V4SF")]
7565
+(define_expand "vec_pack_trunc_v2df"
7566
+ [(set (match_operand:V4SF 0 "register_operand")
7568
+ (float_truncate:V2SF
7569
+ (match_operand:V2DF 1 "register_operand"))
7570
+ (float_truncate:V2SF
7571
+ (match_operand:V2DF 2 "register_operand"))
7575
+ rtx tmp = gen_reg_rtx (V2SFmode);
7576
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1]));
7577
+ emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
7578
+ tmp, operands[2]));
7583
+(define_expand "vec_pack_trunc_df"
7584
+ [(set (match_operand:V2SF 0 "register_operand")
7586
+ (float_truncate:SF
7587
+ (match_operand:DF 1 "register_operand"))
7588
+ (float_truncate:SF
7589
+ (match_operand:DF 2 "register_operand"))
7593
+ rtx tmp = gen_reg_rtx (V2SFmode);
7594
+ emit_insn (gen_move_lo_quad_v2df (tmp, operands[1]));
7595
+ emit_insn (gen_move_hi_quad_v2df (tmp, operands[2]));
7596
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
7601
(define_insn "aarch64_vmls<mode>"
7602
[(set (match_operand:VDQF 0 "register_operand" "=w")
7603
(minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
7604
@@ -1261,51 +1540,70 @@
7605
;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
7608
-(define_insn "smax<mode>3"
7609
+(define_insn "<su><maxmin><mode>3"
7610
[(set (match_operand:VDQF 0 "register_operand" "=w")
7611
- (smax:VDQF (match_operand:VDQF 1 "register_operand" "w")
7612
+ (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
7613
(match_operand:VDQF 2 "register_operand" "w")))]
7615
- "fmaxnm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7616
+ "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7617
[(set_attr "simd_type" "simd_fminmax")
7618
(set_attr "simd_mode" "<MODE>")]
7621
-(define_insn "smin<mode>3"
7622
+(define_insn "<maxmin_uns><mode>3"
7623
[(set (match_operand:VDQF 0 "register_operand" "=w")
7624
- (smin:VDQF (match_operand:VDQF 1 "register_operand" "w")
7625
- (match_operand:VDQF 2 "register_operand" "w")))]
7626
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
7627
+ (match_operand:VDQF 2 "register_operand" "w")]
7630
- "fminnm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7631
+ "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7632
[(set_attr "simd_type" "simd_fminmax")
7633
(set_attr "simd_mode" "<MODE>")]
7636
-;; FP 'across lanes' max and min ops.
7637
+;; 'across lanes' add.
7639
-(define_insn "reduc_s<fmaxminv>_v4sf"
7640
- [(set (match_operand:V4SF 0 "register_operand" "=w")
7641
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
7643
+(define_insn "reduc_<sur>plus_<mode>"
7644
+ [(set (match_operand:VDQV 0 "register_operand" "=w")
7645
+ (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
7648
- "f<fmaxminv>nmv\\t%s0, %1.4s";
7649
- [(set_attr "simd_type" "simd_fminmaxv")
7650
- (set_attr "simd_mode" "V4SF")]
7651
+ "addv\\t%<Vetype>0, %1.<Vtype>"
7652
+ [(set_attr "simd_type" "simd_addv")
7653
+ (set_attr "simd_mode" "<MODE>")]
7656
-(define_insn "reduc_s<fmaxminv>_<mode>"
7657
+(define_insn "reduc_<sur>plus_v2di"
7658
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
7659
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
7662
+ "addp\\t%d0, %1.2d"
7663
+ [(set_attr "simd_type" "simd_addv")
7664
+ (set_attr "simd_mode" "V2DI")]
7667
+(define_insn "reduc_<sur>plus_v2si"
7668
+ [(set (match_operand:V2SI 0 "register_operand" "=w")
7669
+ (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
7672
+ "addp\\t%0.2s, %1.2s, %1.2s"
7673
+ [(set_attr "simd_type" "simd_addv")
7674
+ (set_attr "simd_mode" "V2SI")]
7677
+(define_insn "reduc_<sur>plus_<mode>"
7678
[(set (match_operand:V2F 0 "register_operand" "=w")
7679
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
7683
- "f<fmaxminv>nmp\\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>";
7684
- [(set_attr "simd_type" "simd_fminmax")
7685
+ "faddp\\t%<Vetype>0, %1.<Vtype>"
7686
+ [(set_attr "simd_type" "simd_fadd")
7687
(set_attr "simd_mode" "<MODE>")]
7690
-;; FP 'across lanes' add.
7692
-(define_insn "aarch64_addvv4sf"
7693
+(define_insn "aarch64_addpv4sf"
7694
[(set (match_operand:V4SF 0 "register_operand" "=w")
7695
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
7697
@@ -1315,169 +1613,106 @@
7698
(set_attr "simd_mode" "V4SF")]
7701
-(define_expand "reduc_uplus_v4sf"
7702
- [(set (match_operand:V4SF 0 "register_operand" "=w")
7703
- (match_operand:V4SF 1 "register_operand" "w"))]
7704
+(define_expand "reduc_<sur>plus_v4sf"
7705
+ [(set (match_operand:V4SF 0 "register_operand")
7706
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
7710
rtx tmp = gen_reg_rtx (V4SFmode);
7711
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
7712
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
7713
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
7714
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
7718
-(define_expand "reduc_splus_v4sf"
7719
- [(set (match_operand:V4SF 0 "register_operand" "=w")
7720
- (match_operand:V4SF 1 "register_operand" "w"))]
7721
+(define_insn "clz<mode>2"
7722
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
7723
+ (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
7726
- rtx tmp = gen_reg_rtx (V4SFmode);
7727
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
7728
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
7732
-(define_insn "aarch64_addv<mode>"
7733
- [(set (match_operand:V2F 0 "register_operand" "=w")
7734
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
7737
- "faddp\\t%<Vetype>0, %1.<Vtype>"
7738
- [(set_attr "simd_type" "simd_fadd")
7739
- (set_attr "simd_mode" "<MODE>")]
7740
+ "clz\\t%0.<Vtype>, %1.<Vtype>"
7741
+ [(set_attr "simd_type" "simd_cls")
7742
+ (set_attr "simd_mode" "<MODE>")]
7745
-(define_expand "reduc_uplus_<mode>"
7746
- [(set (match_operand:V2F 0 "register_operand" "=w")
7747
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
7752
+;; 'across lanes' max and min ops.
7754
-(define_expand "reduc_splus_<mode>"
7755
- [(set (match_operand:V2F 0 "register_operand" "=w")
7756
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
7762
-;; Reduction across lanes.
7764
-(define_insn "aarch64_addv<mode>"
7765
+(define_insn "reduc_<maxmin_uns>_<mode>"
7766
[(set (match_operand:VDQV 0 "register_operand" "=w")
7767
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
7771
- "addv\\t%<Vetype>0, %1.<Vtype>"
7772
- [(set_attr "simd_type" "simd_addv")
7773
+ "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
7774
+ [(set_attr "simd_type" "simd_minmaxv")
7775
(set_attr "simd_mode" "<MODE>")]
7778
-(define_expand "reduc_splus_<mode>"
7779
- [(set (match_operand:VDQV 0 "register_operand" "=w")
7780
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
7786
-(define_expand "reduc_uplus_<mode>"
7787
- [(set (match_operand:VDQV 0 "register_operand" "=w")
7788
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
7794
-(define_insn "aarch64_addvv2di"
7795
+(define_insn "reduc_<maxmin_uns>_v2di"
7796
[(set (match_operand:V2DI 0 "register_operand" "=w")
7797
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
7801
- "addp\\t%d0, %1.2d"
7802
- [(set_attr "simd_type" "simd_add")
7803
+ "<maxmin_uns_op>p\\t%d0, %1.2d"
7804
+ [(set_attr "simd_type" "simd_minmaxv")
7805
(set_attr "simd_mode" "V2DI")]
7808
-(define_expand "reduc_uplus_v2di"
7809
- [(set (match_operand:V2DI 0 "register_operand" "=w")
7810
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
7816
-(define_expand "reduc_splus_v2di"
7817
- [(set (match_operand:V2DI 0 "register_operand" "=w")
7818
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
7824
-(define_insn "aarch64_addvv2si"
7825
+(define_insn "reduc_<maxmin_uns>_v2si"
7826
[(set (match_operand:V2SI 0 "register_operand" "=w")
7827
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
7831
- "addp\\t%0.2s, %1.2s, %1.2s"
7832
- [(set_attr "simd_type" "simd_add")
7833
+ "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
7834
+ [(set_attr "simd_type" "simd_minmaxv")
7835
(set_attr "simd_mode" "V2SI")]
7838
-(define_expand "reduc_uplus_v2si"
7839
- [(set (match_operand:V2SI 0 "register_operand" "=w")
7840
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
7842
+(define_insn "reduc_<maxmin_uns>_<mode>"
7843
+ [(set (match_operand:V2F 0 "register_operand" "=w")
7844
+ (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
7850
-(define_expand "reduc_splus_v2si"
7851
- [(set (match_operand:V2SI 0 "register_operand" "=w")
7852
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
7858
-(define_insn "reduc_<maxminv>_<mode>"
7859
- [(set (match_operand:VDQV 0 "register_operand" "=w")
7860
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
7863
- "<maxminv>v\\t%<Vetype>0, %1.<Vtype>"
7864
- [(set_attr "simd_type" "simd_minmaxv")
7865
+ "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
7866
+ [(set_attr "simd_type" "simd_fminmaxv")
7867
(set_attr "simd_mode" "<MODE>")]
7870
-(define_insn "reduc_<maxminv>_v2si"
7871
- [(set (match_operand:V2SI 0 "register_operand" "=w")
7872
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
7874
+(define_insn "reduc_<maxmin_uns>_v4sf"
7875
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
7876
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
7879
- "<maxminv>p\\t%0.2s, %1.2s, %1.2s"
7880
- [(set_attr "simd_type" "simd_minmax")
7881
- (set_attr "simd_mode" "V2SI")]
7882
+ "<maxmin_uns_op>v\\t%s0, %1.4s"
7883
+ [(set_attr "simd_type" "simd_fminmaxv")
7884
+ (set_attr "simd_mode" "V4SF")]
7887
-;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register
7888
-;; allocation. For an intrinsic of form:
7889
-;; vD = bsl_* (vS, vN, vM)
7890
+;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
7892
+;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
7895
+;; Thus our BSL is of the form:
7896
+;; op0 = bsl (mask, op2, op3)
7897
;; We can use any of:
7898
-;; bsl vS, vN, vM (if D = S)
7899
-;; bit vD, vN, vS (if D = M, so 1-bits in vS choose bits from vN, else vM)
7900
-;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN)
7903
+;; bsl mask, op1, op2
7904
+;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
7905
+;; bit op0, op2, mask
7906
+;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
7907
+;; bif op0, op1, mask
7909
(define_insn "aarch64_simd_bsl<mode>_internal"
7910
[(set (match_operand:VALL 0 "register_operand" "=w,w,w")
7912
- [(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
7913
- (match_operand:VALL 2 "register_operand" " w,w,0")
7914
- (match_operand:VALL 3 "register_operand" " w,0,w")]
7918
+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
7919
+ (match_operand:VALL 2 "register_operand" " w,w,0"))
7921
+ (not:<V_cmp_result>
7922
+ (match_dup:<V_cmp_result> 1))
7923
+ (match_operand:VALL 3 "register_operand" " w,0,w"))
7927
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
7928
@@ -1486,28 +1721,32 @@
7931
(define_expand "aarch64_simd_bsl<mode>"
7932
- [(set (match_operand:VALL 0 "register_operand")
7933
- (unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand")
7934
- (match_operand:VALL 2 "register_operand")
7935
- (match_operand:VALL 3 "register_operand")]
7938
+ [(match_operand:VALL 0 "register_operand")
7939
+ (match_operand:<V_cmp_result> 1 "register_operand")
7940
+ (match_operand:VALL 2 "register_operand")
7941
+ (match_operand:VALL 3 "register_operand")]
7944
/* We can't alias operands together if they have different modes. */
7945
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
7946
+ emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
7947
+ operands[2], operands[3]));
7951
-(define_expand "aarch64_vcond_internal<mode>"
7952
+(define_expand "aarch64_vcond_internal<mode><mode>"
7953
[(set (match_operand:VDQ 0 "register_operand")
7955
(match_operator 3 "comparison_operator"
7956
[(match_operand:VDQ 4 "register_operand")
7957
(match_operand:VDQ 5 "nonmemory_operand")])
7958
- (match_operand:VDQ 1 "register_operand")
7959
- (match_operand:VDQ 2 "register_operand")))]
7960
+ (match_operand:VDQ 1 "nonmemory_operand")
7961
+ (match_operand:VDQ 2 "nonmemory_operand")))]
7964
int inverse = 0, has_zero_imm_form = 0;
7965
+ rtx op1 = operands[1];
7966
+ rtx op2 = operands[2];
7967
rtx mask = gen_reg_rtx (<MODE>mode);
7969
switch (GET_CODE (operands[3]))
7970
@@ -1548,12 +1787,12 @@
7974
- emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
7975
+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
7980
- emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
7981
+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
7985
@@ -1566,30 +1805,47 @@
7989
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
7992
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
7995
+ op1 = operands[2];
7996
+ op2 = operands[1];
7999
+ /* If we have (a = (b CMP c) ? -1 : 0);
8000
+ Then we can simply move the generated mask. */
8002
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
8003
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
8004
+ emit_move_insn (operands[0], mask);
8008
+ op1 = force_reg (<MODE>mode, op1);
8010
+ op2 = force_reg (<MODE>mode, op2);
8011
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
8018
-(define_expand "aarch64_vcond_internal<mode>"
8019
- [(set (match_operand:VDQF 0 "register_operand")
8020
+(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
8021
+ [(set (match_operand:VDQF_COND 0 "register_operand")
8023
(match_operator 3 "comparison_operator"
8024
[(match_operand:VDQF 4 "register_operand")
8025
(match_operand:VDQF 5 "nonmemory_operand")])
8026
- (match_operand:VDQF 1 "register_operand")
8027
- (match_operand:VDQF 2 "register_operand")))]
8028
+ (match_operand:VDQF_COND 1 "nonmemory_operand")
8029
+ (match_operand:VDQF_COND 2 "nonmemory_operand")))]
8033
int use_zero_form = 0;
8034
int swap_bsl_operands = 0;
8035
- rtx mask = gen_reg_rtx (<V_cmp_result>mode);
8036
- rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
8037
+ rtx op1 = operands[1];
8038
+ rtx op2 = operands[2];
8039
+ rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
8040
+ rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
8042
rtx (*base_comparison) (rtx, rtx, rtx);
8043
rtx (*complimentary_comparison) (rtx, rtx, rtx);
8044
@@ -1609,7 +1865,7 @@
8047
if (!REG_P (operands[5]))
8048
- operands[5] = force_reg (<MODE>mode, operands[5]);
8049
+ operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
8052
switch (GET_CODE (operands[3]))
8053
@@ -1622,8 +1878,8 @@
8057
- base_comparison = gen_aarch64_cmge<mode>;
8058
- complimentary_comparison = gen_aarch64_cmgt<mode>;
8059
+ base_comparison = gen_aarch64_cmge<VDQF:mode>;
8060
+ complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
8064
@@ -1631,14 +1887,14 @@
8068
- base_comparison = gen_aarch64_cmgt<mode>;
8069
- complimentary_comparison = gen_aarch64_cmge<mode>;
8070
+ base_comparison = gen_aarch64_cmgt<VDQF:mode>;
8071
+ complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
8076
- base_comparison = gen_aarch64_cmeq<mode>;
8077
- complimentary_comparison = gen_aarch64_cmeq<mode>;
8078
+ base_comparison = gen_aarch64_cmeq<VDQF:mode>;
8079
+ complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
8083
@@ -1666,10 +1922,10 @@
8084
switch (GET_CODE (operands[3]))
8087
- base_comparison = gen_aarch64_cmlt<mode>;
8088
+ base_comparison = gen_aarch64_cmlt<VDQF:mode>;
8091
- base_comparison = gen_aarch64_cmle<mode>;
8092
+ base_comparison = gen_aarch64_cmle<VDQF:mode>;
8095
/* Do nothing, other zero form cases already have the correct
8096
@@ -1712,9 +1968,9 @@
8097
true iff !(a != b && a ORDERED b), swapping the operands to BSL
8098
will then give us (a == b || a UNORDERED b) as intended. */
8100
- emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
8101
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
8102
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
8103
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
8104
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
8105
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
8106
swap_bsl_operands = 1;
8109
@@ -1723,20 +1979,36 @@
8110
swap_bsl_operands = 1;
8113
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
8114
- emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
8115
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
8116
+ emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
8117
+ emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
8118
+ emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
8124
if (swap_bsl_operands)
8125
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
8128
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
8131
+ op1 = operands[2];
8132
+ op2 = operands[1];
8135
+ /* If we have (a = (b CMP c) ? -1 : 0);
8136
+ Then we can simply move the generated mask. */
8138
+ if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
8139
+ && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
8140
+ emit_move_insn (operands[0], mask);
8144
+ op1 = force_reg (<VDQF_COND:MODE>mode, op1);
8146
+ op2 = force_reg (<VDQF_COND:MODE>mode, op2);
8147
+ emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
8154
@@ -1746,16 +2018,32 @@
8155
(match_operator 3 "comparison_operator"
8156
[(match_operand:VALL 4 "register_operand")
8157
(match_operand:VALL 5 "nonmemory_operand")])
8158
- (match_operand:VALL 1 "register_operand")
8159
- (match_operand:VALL 2 "register_operand")))]
8160
+ (match_operand:VALL 1 "nonmemory_operand")
8161
+ (match_operand:VALL 2 "nonmemory_operand")))]
8164
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
8165
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
8166
operands[2], operands[3],
8167
operands[4], operands[5]));
8171
+(define_expand "vcond<v_cmp_result><mode>"
8172
+ [(set (match_operand:<V_cmp_result> 0 "register_operand")
8173
+ (if_then_else:<V_cmp_result>
8174
+ (match_operator 3 "comparison_operator"
8175
+ [(match_operand:VDQF 4 "register_operand")
8176
+ (match_operand:VDQF 5 "nonmemory_operand")])
8177
+ (match_operand:<V_cmp_result> 1 "nonmemory_operand")
8178
+ (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
8181
+ emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
8182
+ operands[0], operands[1],
8183
+ operands[2], operands[3],
8184
+ operands[4], operands[5]));
8188
(define_expand "vcondu<mode><mode>"
8189
[(set (match_operand:VDQ 0 "register_operand")
8190
@@ -1763,11 +2051,11 @@
8191
(match_operator 3 "comparison_operator"
8192
[(match_operand:VDQ 4 "register_operand")
8193
(match_operand:VDQ 5 "nonmemory_operand")])
8194
- (match_operand:VDQ 1 "register_operand")
8195
- (match_operand:VDQ 2 "register_operand")))]
8196
+ (match_operand:VDQ 1 "nonmemory_operand")
8197
+ (match_operand:VDQ 2 "nonmemory_operand")))]
8200
- emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
8201
+ emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
8202
operands[2], operands[3],
8203
operands[4], operands[5]));
8205
@@ -2861,28 +3149,6 @@
8206
(set_attr "simd_mode" "<MODE>")]
8211
-(define_expand "aarch64_sshl_n<mode>"
8212
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
8213
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
8214
- (match_operand:SI 2 "immediate_operand" "i")]
8217
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
8221
-(define_expand "aarch64_ushl_n<mode>"
8222
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
8223
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
8224
- (match_operand:SI 2 "immediate_operand" "i")]
8227
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
8233
(define_insn "aarch64_<sur>shll_n<mode>"
8234
@@ -2927,28 +3193,6 @@
8235
(set_attr "simd_mode" "<MODE>")]
8240
-(define_expand "aarch64_sshr_n<mode>"
8241
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
8242
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
8243
- (match_operand:SI 2 "immediate_operand" "i")]
8246
- emit_insn (gen_ashr<mode>3 (operands[0], operands[1], operands[2]));
8250
-(define_expand "aarch64_ushr_n<mode>"
8251
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
8252
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
8253
- (match_operand:SI 2 "immediate_operand" "i")]
8256
- emit_insn (gen_lshr<mode>3 (operands[0], operands[1], operands[2]));
8262
(define_insn "aarch64_<sur>shr_n<mode>"
8263
@@ -3034,52 +3278,180 @@
8267
-;; cm(eq|ge|le|lt|gt)
8268
+;; cm(eq|ge|gt|lt|le)
8269
+;; Note, we have constraints for Dz and Z as different expanders
8270
+;; have different ideas of what should be passed to this pattern.
8272
-(define_insn "aarch64_cm<cmp><mode>"
8273
+(define_insn "aarch64_cm<optab><mode>"
8274
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
8275
- (unspec:<V_cmp_result>
8276
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
8277
- (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
8279
+ (neg:<V_cmp_result>
8280
+ (COMPARISONS:<V_cmp_result>
8281
+ (match_operand:VDQ 1 "register_operand" "w,w")
8282
+ (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
8286
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
8287
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
8288
+ cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
8289
+ cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
8290
[(set_attr "simd_type" "simd_cmp")
8291
(set_attr "simd_mode" "<MODE>")]
8295
+(define_insn_and_split "aarch64_cm<optab>di"
8296
+ [(set (match_operand:DI 0 "register_operand" "=w,w,r")
8299
+ (match_operand:DI 1 "register_operand" "w,w,r")
8300
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
8302
+ (clobber (reg:CC CC_REGNUM))]
8305
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
8306
+ cm<optab>\t%d0, %d1, #0
8309
+ /* We need to prevent the split from
8310
+ happening in the 'w' constraint cases. */
8311
+ && GP_REGNUM_P (REGNO (operands[0]))
8312
+ && GP_REGNUM_P (REGNO (operands[1]))"
8315
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
8316
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
8317
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
8318
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
8321
+ [(set_attr "simd_type" "simd_cmp")
8322
+ (set_attr "simd_mode" "DI")]
8325
-(define_insn "aarch64_cm<cmp><mode>"
8328
+(define_insn "aarch64_cm<optab><mode>"
8329
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
8330
- (unspec:<V_cmp_result>
8331
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
8332
- (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
8334
+ (neg:<V_cmp_result>
8335
+ (UCOMPARISONS:<V_cmp_result>
8336
+ (match_operand:VDQ 1 "register_operand" "w")
8337
+ (match_operand:VDQ 2 "register_operand" "w")
8340
- "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8341
+ "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
8342
[(set_attr "simd_type" "simd_cmp")
8343
(set_attr "simd_mode" "<MODE>")]
8346
-;; fcm(eq|ge|le|lt|gt)
8347
+(define_insn_and_split "aarch64_cm<optab>di"
8348
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
8351
+ (match_operand:DI 1 "register_operand" "w,r")
8352
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
8354
+ (clobber (reg:CC CC_REGNUM))]
8357
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
8360
+ /* We need to prevent the split from
8361
+ happening in the 'w' constraint cases. */
8362
+ && GP_REGNUM_P (REGNO (operands[0]))
8363
+ && GP_REGNUM_P (REGNO (operands[1]))"
8366
+ enum machine_mode mode = CCmode;
8367
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
8368
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
8369
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
8372
+ [(set_attr "simd_type" "simd_cmp")
8373
+ (set_attr "simd_mode" "DI")]
8376
-(define_insn "aarch64_cm<cmp><mode>"
8379
+(define_insn "aarch64_cmtst<mode>"
8380
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
8381
+ (neg:<V_cmp_result>
8382
+ (ne:<V_cmp_result>
8384
+ (match_operand:VDQ 1 "register_operand" "w")
8385
+ (match_operand:VDQ 2 "register_operand" "w"))
8386
+ (vec_duplicate:<V_cmp_result> (const_int 0)))))]
8388
+ "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8389
+ [(set_attr "simd_type" "simd_cmp")
8390
+ (set_attr "simd_mode" "<MODE>")]
8393
+(define_insn_and_split "aarch64_cmtstdi"
8394
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
8398
+ (match_operand:DI 1 "register_operand" "w,r")
8399
+ (match_operand:DI 2 "register_operand" "w,r"))
8401
+ (clobber (reg:CC CC_REGNUM))]
8404
+ cmtst\t%d0, %d1, %d2
8407
+ /* We need to prevent the split from
8408
+ happening in the 'w' constraint cases. */
8409
+ && GP_REGNUM_P (REGNO (operands[0]))
8410
+ && GP_REGNUM_P (REGNO (operands[1]))"
8413
+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
8414
+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
8415
+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
8416
+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
8417
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
8420
+ [(set_attr "simd_type" "simd_cmp")
8421
+ (set_attr "simd_mode" "DI")]
8424
+;; fcm(eq|ge|gt|le|lt)
8426
+(define_insn "aarch64_cm<optab><mode>"
8427
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
8428
- (unspec:<V_cmp_result>
8429
- [(match_operand:VDQF 1 "register_operand" "w,w")
8430
- (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
8432
+ (neg:<V_cmp_result>
8433
+ (COMPARISONS:<V_cmp_result>
8434
+ (match_operand:VALLF 1 "register_operand" "w,w")
8435
+ (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
8439
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
8440
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
8441
+ fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
8442
+ fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
8443
[(set_attr "simd_type" "simd_fcmp")
8444
(set_attr "simd_mode" "<MODE>")]
8448
+;; Note we can also handle what would be fac(le|lt) by
8449
+;; generating fac(ge|gt).
8451
+(define_insn "*aarch64_fac<optab><mode>"
8452
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
8453
+ (neg:<V_cmp_result>
8454
+ (FAC_COMPARISONS:<V_cmp_result>
8455
+ (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
8456
+ (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
8459
+ "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
8460
+ [(set_attr "simd_type" "simd_fcmp")
8461
+ (set_attr "simd_mode" "<MODE>")]
8466
(define_insn "aarch64_addp<mode>"
8467
@@ -3105,30 +3477,6 @@
8468
(set_attr "simd_mode" "DI")]
8473
-(define_expand "aarch64_<maxmin><mode>"
8474
- [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
8475
- (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
8476
- (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
8479
- emit_insn (gen_<maxmin><mode>3 (operands[0], operands[1], operands[2]));
8484
-(define_insn "aarch64_<fmaxmin><mode>"
8485
- [(set (match_operand:VDQF 0 "register_operand" "=w")
8486
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
8487
- (match_operand:VDQF 2 "register_operand" "w")]
8490
- "<fmaxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8491
- [(set_attr "simd_type" "simd_fminmax")
8492
- (set_attr "simd_mode" "<MODE>")]
8497
(define_insn "sqrt<mode>2"
8498
@@ -3140,16 +3488,6 @@
8499
(set_attr "simd_mode" "<MODE>")]
8502
-(define_expand "aarch64_sqrt<mode>"
8503
- [(match_operand:VDQF 0 "register_operand" "=w")
8504
- (match_operand:VDQF 1 "register_operand" "w")]
8507
- emit_insn (gen_sqrt<mode>2 (operands[0], operands[1]));
8512
;; Patterns for vector struct loads and stores.
8514
(define_insn "vec_load_lanesoi<mode>"
8515
@@ -3714,3 +4052,25 @@
8516
"ld1r\\t{%0.<Vtype>}, %1"
8517
[(set_attr "simd_type" "simd_load1r")
8518
(set_attr "simd_mode" "<MODE>")])
8520
+(define_insn "aarch64_frecpe<mode>"
8521
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
8522
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
8525
+ "frecpe\\t%0.<Vtype>, %1.<Vtype>"
8526
+ [(set_attr "simd_type" "simd_frecpe")
8527
+ (set_attr "simd_mode" "<MODE>")]
8530
+(define_insn "aarch64_frecps<mode>"
8531
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
8532
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
8533
+ (match_operand:VDQF 2 "register_operand" "w")]
8536
+ "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8537
+ [(set_attr "simd_type" "simd_frecps")
8538
+ (set_attr "simd_mode" "<MODE>")]
8541
--- a/src/gcc/config/aarch64/predicates.md
8542
+++ b/src/gcc/config/aarch64/predicates.md
8544
(ior (match_operand 0 "register_operand")
8545
(match_test "op == const0_rtx"))))
8547
+(define_predicate "aarch64_reg_or_fp_zero"
8548
+ (and (match_code "reg,subreg,const_double")
8549
+ (ior (match_operand 0 "register_operand")
8550
+ (match_test "aarch64_float_const_zero_rtx_p (op)"))))
8552
(define_predicate "aarch64_reg_zero_or_m1_or_1"
8553
(and (match_code "reg,subreg,const_int")
8554
(ior (match_operand 0 "register_operand")
8555
@@ -110,10 +115,6 @@
8556
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
8559
-(define_predicate "aarch64_const_address"
8560
- (and (match_code "symbol_ref")
8561
- (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)")))
8563
(define_predicate "aarch64_valid_symref"
8564
(match_code "const, symbol_ref, label_ref")
8566
@@ -165,15 +166,10 @@
8569
(define_predicate "aarch64_mov_operand"
8570
- (and (match_code "reg,subreg,mem,const_int,symbol_ref,high")
8571
+ (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high")
8572
(ior (match_operand 0 "register_operand")
8573
(ior (match_operand 0 "memory_operand")
8574
- (ior (match_test "GET_CODE (op) == HIGH
8575
- && aarch64_valid_symref (XEXP (op, 0),
8576
- GET_MODE (XEXP (op, 0)))")
8577
- (ior (match_test "CONST_INT_P (op)
8578
- && aarch64_move_imm (INTVAL (op), mode)")
8579
- (match_test "aarch64_const_address (op, mode)")))))))
8580
+ (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)")))))
8582
(define_predicate "aarch64_movti_operand"
8583
(and (match_code "reg,subreg,mem,const_int")
8584
--- a/src/gcc/config/aarch64/aarch64-elf.h
8585
+++ b/src/gcc/config/aarch64/aarch64-elf.h
8588
#define ASM_COMMENT_START "//"
8590
-#define REGISTER_PREFIX ""
8591
#define LOCAL_LABEL_PREFIX "."
8592
#define USER_LABEL_PREFIX ""
8594
--- a/src/gcc/config/aarch64/arm_neon.h
8595
+++ b/src/gcc/config/aarch64/arm_neon.h
8596
@@ -4468,17 +4468,6 @@
8600
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8601
-vabs_f32 (float32x2_t a)
8603
- float32x2_t result;
8604
- __asm__ ("fabs %0.2s,%1.2s"
8607
- : /* No clobbers */);
8611
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8612
vabs_s8 (int8x8_t a)
8614
@@ -4512,28 +4501,6 @@
8618
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8619
-vabsq_f32 (float32x4_t a)
8621
- float32x4_t result;
8622
- __asm__ ("fabs %0.4s,%1.4s"
8625
- : /* No clobbers */);
8629
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8630
-vabsq_f64 (float64x2_t a)
8632
- float64x2_t result;
8633
- __asm__ ("fabs %0.2d,%1.2d"
8636
- : /* No clobbers */);
8640
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8641
vabsq_s8 (int8x16_t a)
8643
@@ -4578,50 +4545,6 @@
8647
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
8648
-vacged_f64 (float64_t a, float64_t b)
8651
- __asm__ ("facge %d0,%d1,%d2"
8654
- : /* No clobbers */);
8658
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
8659
-vacges_f32 (float32_t a, float32_t b)
8662
- __asm__ ("facge %s0,%s1,%s2"
8665
- : /* No clobbers */);
8669
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
8670
-vacgtd_f64 (float64_t a, float64_t b)
8673
- __asm__ ("facgt %d0,%d1,%d2"
8676
- : /* No clobbers */);
8680
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
8681
-vacgts_f32 (float32_t a, float32_t b)
8684
- __asm__ ("facgt %s0,%s1,%s2"
8687
- : /* No clobbers */);
8691
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
8692
vaddlv_s8 (int8x8_t a)
8694
@@ -4732,116 +4655,6 @@
8698
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
8699
-vaddv_s8 (int8x8_t a)
8702
- __asm__ ("addv %b0,%1.8b"
8705
- : /* No clobbers */);
8709
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
8710
-vaddv_s16 (int16x4_t a)
8713
- __asm__ ("addv %h0,%1.4h"
8716
- : /* No clobbers */);
8720
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
8721
-vaddv_u8 (uint8x8_t a)
8724
- __asm__ ("addv %b0,%1.8b"
8727
- : /* No clobbers */);
8731
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
8732
-vaddv_u16 (uint16x4_t a)
8735
- __asm__ ("addv %h0,%1.4h"
8738
- : /* No clobbers */);
8742
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
8743
-vaddvq_s8 (int8x16_t a)
8746
- __asm__ ("addv %b0,%1.16b"
8749
- : /* No clobbers */);
8753
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
8754
-vaddvq_s16 (int16x8_t a)
8757
- __asm__ ("addv %h0,%1.8h"
8760
- : /* No clobbers */);
8764
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
8765
-vaddvq_s32 (int32x4_t a)
8768
- __asm__ ("addv %s0,%1.4s"
8771
- : /* No clobbers */);
8775
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
8776
-vaddvq_u8 (uint8x16_t a)
8779
- __asm__ ("addv %b0,%1.16b"
8782
- : /* No clobbers */);
8786
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
8787
-vaddvq_u16 (uint16x8_t a)
8790
- __asm__ ("addv %h0,%1.8h"
8793
- : /* No clobbers */);
8797
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8798
-vaddvq_u32 (uint32x4_t a)
8801
- __asm__ ("addv %s0,%1.4s"
8804
- : /* No clobbers */);
8808
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8809
vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
8811
@@ -5095,358 +4908,6 @@
8815
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8816
-vcage_f32 (float32x2_t a, float32x2_t b)
8818
- uint32x2_t result;
8819
- __asm__ ("facge %0.2s, %1.2s, %2.2s"
8822
- : /* No clobbers */);
8826
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8827
-vcageq_f32 (float32x4_t a, float32x4_t b)
8829
- uint32x4_t result;
8830
- __asm__ ("facge %0.4s, %1.4s, %2.4s"
8833
- : /* No clobbers */);
8837
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8838
-vcageq_f64 (float64x2_t a, float64x2_t b)
8840
- uint64x2_t result;
8841
- __asm__ ("facge %0.2d, %1.2d, %2.2d"
8844
- : /* No clobbers */);
8848
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8849
-vcagt_f32 (float32x2_t a, float32x2_t b)
8851
- uint32x2_t result;
8852
- __asm__ ("facgt %0.2s, %1.2s, %2.2s"
8855
- : /* No clobbers */);
8859
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8860
-vcagtq_f32 (float32x4_t a, float32x4_t b)
8862
- uint32x4_t result;
8863
- __asm__ ("facgt %0.4s, %1.4s, %2.4s"
8866
- : /* No clobbers */);
8870
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8871
-vcagtq_f64 (float64x2_t a, float64x2_t b)
8873
- uint64x2_t result;
8874
- __asm__ ("facgt %0.2d, %1.2d, %2.2d"
8877
- : /* No clobbers */);
8881
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8882
-vcale_f32 (float32x2_t a, float32x2_t b)
8884
- uint32x2_t result;
8885
- __asm__ ("facge %0.2s, %2.2s, %1.2s"
8888
- : /* No clobbers */);
8892
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8893
-vcaleq_f32 (float32x4_t a, float32x4_t b)
8895
- uint32x4_t result;
8896
- __asm__ ("facge %0.4s, %2.4s, %1.4s"
8899
- : /* No clobbers */);
8903
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8904
-vcaleq_f64 (float64x2_t a, float64x2_t b)
8906
- uint64x2_t result;
8907
- __asm__ ("facge %0.2d, %2.2d, %1.2d"
8910
- : /* No clobbers */);
8914
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8915
-vcalt_f32 (float32x2_t a, float32x2_t b)
8917
- uint32x2_t result;
8918
- __asm__ ("facgt %0.2s, %2.2s, %1.2s"
8921
- : /* No clobbers */);
8925
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8926
-vcaltq_f32 (float32x4_t a, float32x4_t b)
8928
- uint32x4_t result;
8929
- __asm__ ("facgt %0.4s, %2.4s, %1.4s"
8932
- : /* No clobbers */);
8936
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8937
-vcaltq_f64 (float64x2_t a, float64x2_t b)
8939
- uint64x2_t result;
8940
- __asm__ ("facgt %0.2d, %2.2d, %1.2d"
8943
- : /* No clobbers */);
8947
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8948
-vceq_f32 (float32x2_t a, float32x2_t b)
8950
- uint32x2_t result;
8951
- __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
8954
- : /* No clobbers */);
8958
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8959
-vceq_f64 (float64x1_t a, float64x1_t b)
8961
- uint64x1_t result;
8962
- __asm__ ("fcmeq %d0, %d1, %d2"
8965
- : /* No clobbers */);
8969
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
8970
-vceqd_f64 (float64_t a, float64_t b)
8973
- __asm__ ("fcmeq %d0,%d1,%d2"
8976
- : /* No clobbers */);
8980
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8981
-vceqq_f32 (float32x4_t a, float32x4_t b)
8983
- uint32x4_t result;
8984
- __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
8987
- : /* No clobbers */);
8991
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8992
-vceqq_f64 (float64x2_t a, float64x2_t b)
8994
- uint64x2_t result;
8995
- __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
8998
- : /* No clobbers */);
9002
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9003
-vceqs_f32 (float32_t a, float32_t b)
9006
- __asm__ ("fcmeq %s0,%s1,%s2"
9009
- : /* No clobbers */);
9013
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9014
-vceqzd_f64 (float64_t a)
9017
- __asm__ ("fcmeq %d0,%d1,#0"
9020
- : /* No clobbers */);
9024
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9025
-vceqzs_f32 (float32_t a)
9028
- __asm__ ("fcmeq %s0,%s1,#0"
9031
- : /* No clobbers */);
9035
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9036
-vcge_f32 (float32x2_t a, float32x2_t b)
9038
- uint32x2_t result;
9039
- __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
9042
- : /* No clobbers */);
9046
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9047
-vcge_f64 (float64x1_t a, float64x1_t b)
9049
- uint64x1_t result;
9050
- __asm__ ("fcmge %d0, %d1, %d2"
9053
- : /* No clobbers */);
9057
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9058
-vcgeq_f32 (float32x4_t a, float32x4_t b)
9060
- uint32x4_t result;
9061
- __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
9064
- : /* No clobbers */);
9068
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9069
-vcgeq_f64 (float64x2_t a, float64x2_t b)
9071
- uint64x2_t result;
9072
- __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
9075
- : /* No clobbers */);
9079
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9080
-vcgt_f32 (float32x2_t a, float32x2_t b)
9082
- uint32x2_t result;
9083
- __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
9086
- : /* No clobbers */);
9090
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9091
-vcgt_f64 (float64x1_t a, float64x1_t b)
9093
- uint64x1_t result;
9094
- __asm__ ("fcmgt %d0, %d1, %d2"
9097
- : /* No clobbers */);
9101
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9102
-vcgtq_f32 (float32x4_t a, float32x4_t b)
9104
- uint32x4_t result;
9105
- __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
9108
- : /* No clobbers */);
9112
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9113
-vcgtq_f64 (float64x2_t a, float64x2_t b)
9115
- uint64x2_t result;
9116
- __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
9119
- : /* No clobbers */);
9123
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9124
-vcle_f32 (float32x2_t a, float32x2_t b)
9126
- uint32x2_t result;
9127
- __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
9130
- : /* No clobbers */);
9134
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9135
-vcle_f64 (float64x1_t a, float64x1_t b)
9137
- uint64x1_t result;
9138
- __asm__ ("fcmge %d0, %d2, %d1"
9141
- : /* No clobbers */);
9145
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9146
-vcleq_f32 (float32x4_t a, float32x4_t b)
9148
- uint32x4_t result;
9149
- __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
9152
- : /* No clobbers */);
9156
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9157
-vcleq_f64 (float64x2_t a, float64x2_t b)
9159
- uint64x2_t result;
9160
- __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
9163
- : /* No clobbers */);
9167
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9168
vcls_s8 (int8x8_t a)
9170
@@ -5513,50 +4974,6 @@
9174
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9175
-vclt_f32 (float32x2_t a, float32x2_t b)
9177
- uint32x2_t result;
9178
- __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
9181
- : /* No clobbers */);
9185
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9186
-vclt_f64 (float64x1_t a, float64x1_t b)
9188
- uint64x1_t result;
9189
- __asm__ ("fcmgt %d0, %d2, %d1"
9192
- : /* No clobbers */);
9196
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9197
-vcltq_f32 (float32x4_t a, float32x4_t b)
9199
- uint32x4_t result;
9200
- __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
9203
- : /* No clobbers */);
9207
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9208
-vcltq_f64 (float64x2_t a, float64x2_t b)
9210
- uint64x2_t result;
9211
- __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
9214
- : /* No clobbers */);
9218
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9219
vclz_s8 (int8x8_t a)
9221
@@ -5915,100 +5332,12 @@
9223
/* vcvt_f32_f16 not supported */
9225
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9226
-vcvt_f32_f64 (float64x2_t a)
9228
- float32x2_t result;
9229
- __asm__ ("fcvtn %0.2s,%1.2d"
9232
- : /* No clobbers */);
9236
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9237
-vcvt_f32_s32 (int32x2_t a)
9239
- float32x2_t result;
9240
- __asm__ ("scvtf %0.2s, %1.2s"
9243
- : /* No clobbers */);
9247
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9248
-vcvt_f32_u32 (uint32x2_t a)
9250
- float32x2_t result;
9251
- __asm__ ("ucvtf %0.2s, %1.2s"
9254
- : /* No clobbers */);
9258
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9259
-vcvt_f64_f32 (float32x2_t a)
9261
- float64x2_t result;
9262
- __asm__ ("fcvtl %0.2d,%1.2s"
9265
- : /* No clobbers */);
9269
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
9270
-vcvt_f64_s64 (uint64x1_t a)
9272
- float64x1_t result;
9273
- __asm__ ("scvtf %d0, %d1"
9276
- : /* No clobbers */);
9280
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
9281
-vcvt_f64_u64 (uint64x1_t a)
9283
- float64x1_t result;
9284
- __asm__ ("ucvtf %d0, %d1"
9287
- : /* No clobbers */);
9291
/* vcvt_high_f16_f32 not supported */
9293
/* vcvt_high_f32_f16 not supported */
9295
static float32x2_t vdup_n_f32 (float32_t);
9297
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9298
-vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
9300
- float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
9301
- __asm__ ("fcvtn2 %0.4s,%2.2d"
9304
- : /* No clobbers */);
9308
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9309
-vcvt_high_f64_f32 (float32x4_t a)
9311
- float64x2_t result;
9312
- __asm__ ("fcvtl2 %0.2d,%1.4s"
9315
- : /* No clobbers */);
9319
#define vcvt_n_f32_s32(a, b) \
9322
@@ -6057,160 +5386,6 @@
9326
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9327
-vcvt_s32_f32 (float32x2_t a)
9330
- __asm__ ("fcvtzs %0.2s, %1.2s"
9333
- : /* No clobbers */);
9337
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9338
-vcvt_u32_f32 (float32x2_t a)
9340
- uint32x2_t result;
9341
- __asm__ ("fcvtzu %0.2s, %1.2s"
9344
- : /* No clobbers */);
9348
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9349
-vcvta_s32_f32 (float32x2_t a)
9352
- __asm__ ("fcvtas %0.2s, %1.2s"
9355
- : /* No clobbers */);
9359
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9360
-vcvta_u32_f32 (float32x2_t a)
9362
- uint32x2_t result;
9363
- __asm__ ("fcvtau %0.2s, %1.2s"
9366
- : /* No clobbers */);
9370
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9371
-vcvtad_s64_f64 (float64_t a)
9374
- __asm__ ("fcvtas %d0,%d1"
9377
- : /* No clobbers */);
9381
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9382
-vcvtad_u64_f64 (float64_t a)
9385
- __asm__ ("fcvtau %d0,%d1"
9388
- : /* No clobbers */);
9392
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9393
-vcvtaq_s32_f32 (float32x4_t a)
9396
- __asm__ ("fcvtas %0.4s, %1.4s"
9399
- : /* No clobbers */);
9403
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9404
-vcvtaq_s64_f64 (float64x2_t a)
9407
- __asm__ ("fcvtas %0.2d, %1.2d"
9410
- : /* No clobbers */);
9414
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9415
-vcvtaq_u32_f32 (float32x4_t a)
9417
- uint32x4_t result;
9418
- __asm__ ("fcvtau %0.4s, %1.4s"
9421
- : /* No clobbers */);
9425
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9426
-vcvtaq_u64_f64 (float64x2_t a)
9428
- uint64x2_t result;
9429
- __asm__ ("fcvtau %0.2d, %1.2d"
9432
- : /* No clobbers */);
9436
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9437
-vcvtas_s64_f64 (float32_t a)
9440
- __asm__ ("fcvtas %s0,%s1"
9443
- : /* No clobbers */);
9447
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9448
-vcvtas_u64_f64 (float32_t a)
9451
- __asm__ ("fcvtau %s0,%s1"
9454
- : /* No clobbers */);
9458
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
9459
-vcvtd_f64_s64 (int64_t a)
9462
- __asm__ ("scvtf %d0,%d1"
9465
- : /* No clobbers */);
9469
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9470
-vcvtd_f64_u64 (uint64_t a)
9473
- __asm__ ("ucvtf %d0,%d1"
9476
- : /* No clobbers */);
9480
#define vcvtd_n_f64_s64(a, b) \
9483
@@ -6259,402 +5434,6 @@
9487
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9488
-vcvtd_s64_f64 (float64_t a)
9491
- __asm__ ("fcvtzs %d0,%d1"
9494
- : /* No clobbers */);
9498
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9499
-vcvtd_u64_f64 (float64_t a)
9502
- __asm__ ("fcvtzu %d0,%d1"
9505
- : /* No clobbers */);
9509
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9510
-vcvtm_s32_f32 (float32x2_t a)
9513
- __asm__ ("fcvtms %0.2s, %1.2s"
9516
- : /* No clobbers */);
9520
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9521
-vcvtm_u32_f32 (float32x2_t a)
9523
- uint32x2_t result;
9524
- __asm__ ("fcvtmu %0.2s, %1.2s"
9527
- : /* No clobbers */);
9531
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9532
-vcvtmd_s64_f64 (float64_t a)
9535
- __asm__ ("fcvtms %d0,%d1"
9538
- : /* No clobbers */);
9542
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9543
-vcvtmd_u64_f64 (float64_t a)
9546
- __asm__ ("fcvtmu %d0,%d1"
9549
- : /* No clobbers */);
9553
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9554
-vcvtmq_s32_f32 (float32x4_t a)
9557
- __asm__ ("fcvtms %0.4s, %1.4s"
9560
- : /* No clobbers */);
9564
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9565
-vcvtmq_s64_f64 (float64x2_t a)
9568
- __asm__ ("fcvtms %0.2d, %1.2d"
9571
- : /* No clobbers */);
9575
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9576
-vcvtmq_u32_f32 (float32x4_t a)
9578
- uint32x4_t result;
9579
- __asm__ ("fcvtmu %0.4s, %1.4s"
9582
- : /* No clobbers */);
9586
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9587
-vcvtmq_u64_f64 (float64x2_t a)
9589
- uint64x2_t result;
9590
- __asm__ ("fcvtmu %0.2d, %1.2d"
9593
- : /* No clobbers */);
9597
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9598
-vcvtms_s64_f64 (float32_t a)
9601
- __asm__ ("fcvtms %s0,%s1"
9604
- : /* No clobbers */);
9608
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9609
-vcvtms_u64_f64 (float32_t a)
9612
- __asm__ ("fcvtmu %s0,%s1"
9615
- : /* No clobbers */);
9619
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9620
-vcvtn_s32_f32 (float32x2_t a)
9623
- __asm__ ("fcvtns %0.2s, %1.2s"
9626
- : /* No clobbers */);
9630
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9631
-vcvtn_u32_f32 (float32x2_t a)
9633
- uint32x2_t result;
9634
- __asm__ ("fcvtnu %0.2s, %1.2s"
9637
- : /* No clobbers */);
9641
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9642
-vcvtnd_s64_f64 (float64_t a)
9645
- __asm__ ("fcvtns %d0,%d1"
9648
- : /* No clobbers */);
9652
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9653
-vcvtnd_u64_f64 (float64_t a)
9656
- __asm__ ("fcvtnu %d0,%d1"
9659
- : /* No clobbers */);
9663
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9664
-vcvtnq_s32_f32 (float32x4_t a)
9667
- __asm__ ("fcvtns %0.4s, %1.4s"
9670
- : /* No clobbers */);
9674
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9675
-vcvtnq_s64_f64 (float64x2_t a)
9678
- __asm__ ("fcvtns %0.2d, %1.2d"
9681
- : /* No clobbers */);
9685
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9686
-vcvtnq_u32_f32 (float32x4_t a)
9688
- uint32x4_t result;
9689
- __asm__ ("fcvtnu %0.4s, %1.4s"
9692
- : /* No clobbers */);
9696
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9697
-vcvtnq_u64_f64 (float64x2_t a)
9699
- uint64x2_t result;
9700
- __asm__ ("fcvtnu %0.2d, %1.2d"
9703
- : /* No clobbers */);
9707
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9708
-vcvtns_s64_f64 (float32_t a)
9711
- __asm__ ("fcvtns %s0,%s1"
9714
- : /* No clobbers */);
9718
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9719
-vcvtns_u64_f64 (float32_t a)
9722
- __asm__ ("fcvtnu %s0,%s1"
9725
- : /* No clobbers */);
9729
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9730
-vcvtp_s32_f32 (float32x2_t a)
9733
- __asm__ ("fcvtps %0.2s, %1.2s"
9736
- : /* No clobbers */);
9740
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9741
-vcvtp_u32_f32 (float32x2_t a)
9743
- uint32x2_t result;
9744
- __asm__ ("fcvtpu %0.2s, %1.2s"
9747
- : /* No clobbers */);
9751
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9752
-vcvtpd_s64_f64 (float64_t a)
9755
- __asm__ ("fcvtps %d0,%d1"
9758
- : /* No clobbers */);
9762
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9763
-vcvtpd_u64_f64 (float64_t a)
9766
- __asm__ ("fcvtpu %d0,%d1"
9769
- : /* No clobbers */);
9773
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9774
-vcvtpq_s32_f32 (float32x4_t a)
9777
- __asm__ ("fcvtps %0.4s, %1.4s"
9780
- : /* No clobbers */);
9784
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9785
-vcvtpq_s64_f64 (float64x2_t a)
9788
- __asm__ ("fcvtps %0.2d, %1.2d"
9791
- : /* No clobbers */);
9795
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9796
-vcvtpq_u32_f32 (float32x4_t a)
9798
- uint32x4_t result;
9799
- __asm__ ("fcvtpu %0.4s, %1.4s"
9802
- : /* No clobbers */);
9806
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9807
-vcvtpq_u64_f64 (float64x2_t a)
9809
- uint64x2_t result;
9810
- __asm__ ("fcvtpu %0.2d, %1.2d"
9813
- : /* No clobbers */);
9817
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9818
-vcvtps_s64_f64 (float32_t a)
9821
- __asm__ ("fcvtps %s0,%s1"
9824
- : /* No clobbers */);
9828
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9829
-vcvtps_u64_f64 (float32_t a)
9832
- __asm__ ("fcvtpu %s0,%s1"
9835
- : /* No clobbers */);
9839
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9840
-vcvtq_f32_s32 (int32x4_t a)
9842
- float32x4_t result;
9843
- __asm__ ("scvtf %0.4s, %1.4s"
9846
- : /* No clobbers */);
9850
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9851
-vcvtq_f32_u32 (uint32x4_t a)
9853
- float32x4_t result;
9854
- __asm__ ("ucvtf %0.4s, %1.4s"
9857
- : /* No clobbers */);
9861
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9862
-vcvtq_f64_s64 (int64x2_t a)
9864
- float64x2_t result;
9865
- __asm__ ("scvtf %0.2d, %1.2d"
9868
- : /* No clobbers */);
9872
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9873
-vcvtq_f64_u64 (uint64x2_t a)
9875
- float64x2_t result;
9876
- __asm__ ("ucvtf %0.2d, %1.2d"
9879
- : /* No clobbers */);
9883
#define vcvtq_n_f32_s32(a, b) \
9886
@@ -6751,72 +5530,6 @@
9890
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9891
-vcvtq_s32_f32 (float32x4_t a)
9894
- __asm__ ("fcvtzs %0.4s, %1.4s"
9897
- : /* No clobbers */);
9901
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9902
-vcvtq_s64_f64 (float64x2_t a)
9905
- __asm__ ("fcvtzs %0.2d, %1.2d"
9908
- : /* No clobbers */);
9912
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9913
-vcvtq_u32_f32 (float32x4_t a)
9915
- uint32x4_t result;
9916
- __asm__ ("fcvtzu %0.4s, %1.4s"
9919
- : /* No clobbers */);
9923
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9924
-vcvtq_u64_f64 (float64x2_t a)
9926
- uint64x2_t result;
9927
- __asm__ ("fcvtzu %0.2d, %1.2d"
9930
- : /* No clobbers */);
9934
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
9935
-vcvts_f64_s32 (int32_t a)
9938
- __asm__ ("scvtf %s0,%s1"
9941
- : /* No clobbers */);
9945
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9946
-vcvts_f64_u32 (uint32_t a)
9949
- __asm__ ("ucvtf %s0,%s1"
9952
- : /* No clobbers */);
9956
#define vcvts_n_f32_s32(a, b) \
9959
@@ -6865,28 +5578,6 @@
9963
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9964
-vcvts_s64_f64 (float32_t a)
9967
- __asm__ ("fcvtzs %s0,%s1"
9970
- : /* No clobbers */);
9974
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9975
-vcvts_u64_f64 (float32_t a)
9978
- __asm__ ("fcvtzu %s0,%s1"
9981
- : /* No clobbers */);
9985
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9986
vcvtx_f32_f64 (float64x2_t a)
9988
@@ -9226,303 +7917,6 @@
9992
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9993
-vmaxnm_f32 (float32x2_t a, float32x2_t b)
9995
- float32x2_t result;
9996
- __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s"
9999
- : /* No clobbers */);
10003
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10004
-vmaxnmq_f32 (float32x4_t a, float32x4_t b)
10006
- float32x4_t result;
10007
- __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s"
10010
- : /* No clobbers */);
10014
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10015
-vmaxnmq_f64 (float64x2_t a, float64x2_t b)
10017
- float64x2_t result;
10018
- __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d"
10021
- : /* No clobbers */);
10025
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10026
-vmaxnmvq_f32 (float32x4_t a)
10028
- float32_t result;
10029
- __asm__ ("fmaxnmv %s0,%1.4s"
10032
- : /* No clobbers */);
10036
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10037
-vmaxv_s8 (int8x8_t a)
10040
- __asm__ ("smaxv %b0,%1.8b"
10043
- : /* No clobbers */);
10047
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10048
-vmaxv_s16 (int16x4_t a)
10051
- __asm__ ("smaxv %h0,%1.4h"
10054
- : /* No clobbers */);
10058
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10059
-vmaxv_u8 (uint8x8_t a)
10062
- __asm__ ("umaxv %b0,%1.8b"
10065
- : /* No clobbers */);
10069
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10070
-vmaxv_u16 (uint16x4_t a)
10073
- __asm__ ("umaxv %h0,%1.4h"
10076
- : /* No clobbers */);
10080
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10081
-vmaxvq_f32 (float32x4_t a)
10083
- float32_t result;
10084
- __asm__ ("fmaxv %s0,%1.4s"
10087
- : /* No clobbers */);
10091
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10092
-vmaxvq_s8 (int8x16_t a)
10095
- __asm__ ("smaxv %b0,%1.16b"
10098
- : /* No clobbers */);
10102
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10103
-vmaxvq_s16 (int16x8_t a)
10106
- __asm__ ("smaxv %h0,%1.8h"
10109
- : /* No clobbers */);
10113
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10114
-vmaxvq_s32 (int32x4_t a)
10117
- __asm__ ("smaxv %s0,%1.4s"
10120
- : /* No clobbers */);
10124
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10125
-vmaxvq_u8 (uint8x16_t a)
10128
- __asm__ ("umaxv %b0,%1.16b"
10131
- : /* No clobbers */);
10135
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10136
-vmaxvq_u16 (uint16x8_t a)
10139
- __asm__ ("umaxv %h0,%1.8h"
10142
- : /* No clobbers */);
10146
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10147
-vmaxvq_u32 (uint32x4_t a)
10150
- __asm__ ("umaxv %s0,%1.4s"
10153
- : /* No clobbers */);
10157
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10158
-vminnmvq_f32 (float32x4_t a)
10160
- float32_t result;
10161
- __asm__ ("fminnmv %s0,%1.4s"
10164
- : /* No clobbers */);
10168
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10169
-vminv_s8 (int8x8_t a)
10172
- __asm__ ("sminv %b0,%1.8b"
10175
- : /* No clobbers */);
10179
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10180
-vminv_s16 (int16x4_t a)
10183
- __asm__ ("sminv %h0,%1.4h"
10186
- : /* No clobbers */);
10190
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10191
-vminv_u8 (uint8x8_t a)
10194
- __asm__ ("uminv %b0,%1.8b"
10197
- : /* No clobbers */);
10201
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10202
-vminv_u16 (uint16x4_t a)
10205
- __asm__ ("uminv %h0,%1.4h"
10208
- : /* No clobbers */);
10212
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10213
-vminvq_f32 (float32x4_t a)
10215
- float32_t result;
10216
- __asm__ ("fminv %s0,%1.4s"
10219
- : /* No clobbers */);
10223
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10224
-vminvq_s8 (int8x16_t a)
10227
- __asm__ ("sminv %b0,%1.16b"
10230
- : /* No clobbers */);
10234
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10235
-vminvq_s16 (int16x8_t a)
10238
- __asm__ ("sminv %h0,%1.8h"
10241
- : /* No clobbers */);
10245
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10246
-vminvq_s32 (int32x4_t a)
10249
- __asm__ ("sminv %s0,%1.4s"
10252
- : /* No clobbers */);
10256
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10257
-vminvq_u8 (uint8x16_t a)
10260
- __asm__ ("uminv %b0,%1.16b"
10263
- : /* No clobbers */);
10267
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10268
-vminvq_u16 (uint16x8_t a)
10271
- __asm__ ("uminv %h0,%1.8h"
10274
- : /* No clobbers */);
10278
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10279
-vminvq_u32 (uint32x4_t a)
10282
- __asm__ ("uminv %s0,%1.4s"
10285
- : /* No clobbers */);
10289
#define vmla_lane_f32(a, b, c, d) \
10292
@@ -14556,17 +12950,6 @@
10296
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10297
-vrecpe_f32 (float32x2_t a)
10299
- float32x2_t result;
10300
- __asm__ ("frecpe %0.2s,%1.2s"
10303
- : /* No clobbers */);
10307
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10308
vrecpe_u32 (uint32x2_t a)
10310
@@ -14578,39 +12961,6 @@
10314
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10315
-vrecped_f64 (float64_t a)
10317
- float64_t result;
10318
- __asm__ ("frecpe %d0,%d1"
10321
- : /* No clobbers */);
10325
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10326
-vrecpeq_f32 (float32x4_t a)
10328
- float32x4_t result;
10329
- __asm__ ("frecpe %0.4s,%1.4s"
10332
- : /* No clobbers */);
10336
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10337
-vrecpeq_f64 (float64x2_t a)
10339
- float64x2_t result;
10340
- __asm__ ("frecpe %0.2d,%1.2d"
10343
- : /* No clobbers */);
10347
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10348
vrecpeq_u32 (uint32x4_t a)
10350
@@ -14622,94 +12972,6 @@
10354
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10355
-vrecpes_f32 (float32_t a)
10357
- float32_t result;
10358
- __asm__ ("frecpe %s0,%s1"
10361
- : /* No clobbers */);
10365
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10366
-vrecps_f32 (float32x2_t a, float32x2_t b)
10368
- float32x2_t result;
10369
- __asm__ ("frecps %0.2s,%1.2s,%2.2s"
10372
- : /* No clobbers */);
10376
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10377
-vrecpsd_f64 (float64_t a, float64_t b)
10379
- float64_t result;
10380
- __asm__ ("frecps %d0,%d1,%d2"
10383
- : /* No clobbers */);
10387
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10388
-vrecpsq_f32 (float32x4_t a, float32x4_t b)
10390
- float32x4_t result;
10391
- __asm__ ("frecps %0.4s,%1.4s,%2.4s"
10394
- : /* No clobbers */);
10398
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10399
-vrecpsq_f64 (float64x2_t a, float64x2_t b)
10401
- float64x2_t result;
10402
- __asm__ ("frecps %0.2d,%1.2d,%2.2d"
10405
- : /* No clobbers */);
10409
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10410
-vrecpss_f32 (float32_t a, float32_t b)
10412
- float32_t result;
10413
- __asm__ ("frecps %s0,%s1,%s2"
10416
- : /* No clobbers */);
10420
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10421
-vrecpxd_f64 (float64_t a)
10423
- float64_t result;
10424
- __asm__ ("frecpe %d0,%d1"
10427
- : /* No clobbers */);
10431
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10432
-vrecpxs_f32 (float32_t a)
10434
- float32_t result;
10435
- __asm__ ("frecpe %s0,%s1"
10438
- : /* No clobbers */);
10442
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10443
vrev16_p8 (poly8x8_t a)
10445
@@ -15106,171 +13368,6 @@
10449
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10450
-vrnd_f32 (float32x2_t a)
10452
- float32x2_t result;
10453
- __asm__ ("frintz %0.2s,%1.2s"
10456
- : /* No clobbers */);
10460
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10461
-vrnda_f32 (float32x2_t a)
10463
- float32x2_t result;
10464
- __asm__ ("frinta %0.2s,%1.2s"
10467
- : /* No clobbers */);
10471
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10472
-vrndm_f32 (float32x2_t a)
10474
- float32x2_t result;
10475
- __asm__ ("frintm %0.2s,%1.2s"
10478
- : /* No clobbers */);
10482
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10483
-vrndn_f32 (float32x2_t a)
10485
- float32x2_t result;
10486
- __asm__ ("frintn %0.2s,%1.2s"
10489
- : /* No clobbers */);
10493
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10494
-vrndp_f32 (float32x2_t a)
10496
- float32x2_t result;
10497
- __asm__ ("frintp %0.2s,%1.2s"
10500
- : /* No clobbers */);
10504
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10505
-vrndq_f32 (float32x4_t a)
10507
- float32x4_t result;
10508
- __asm__ ("frintz %0.4s,%1.4s"
10511
- : /* No clobbers */);
10515
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10516
-vrndq_f64 (float64x2_t a)
10518
- float64x2_t result;
10519
- __asm__ ("frintz %0.2d,%1.2d"
10522
- : /* No clobbers */);
10526
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10527
-vrndqa_f32 (float32x4_t a)
10529
- float32x4_t result;
10530
- __asm__ ("frinta %0.4s,%1.4s"
10533
- : /* No clobbers */);
10537
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10538
-vrndqa_f64 (float64x2_t a)
10540
- float64x2_t result;
10541
- __asm__ ("frinta %0.2d,%1.2d"
10544
- : /* No clobbers */);
10548
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10549
-vrndqm_f32 (float32x4_t a)
10551
- float32x4_t result;
10552
- __asm__ ("frintm %0.4s,%1.4s"
10555
- : /* No clobbers */);
10559
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10560
-vrndqm_f64 (float64x2_t a)
10562
- float64x2_t result;
10563
- __asm__ ("frintm %0.2d,%1.2d"
10566
- : /* No clobbers */);
10570
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10571
-vrndqn_f32 (float32x4_t a)
10573
- float32x4_t result;
10574
- __asm__ ("frintn %0.4s,%1.4s"
10577
- : /* No clobbers */);
10581
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10582
-vrndqn_f64 (float64x2_t a)
10584
- float64x2_t result;
10585
- __asm__ ("frintn %0.2d,%1.2d"
10588
- : /* No clobbers */);
10592
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10593
-vrndqp_f32 (float32x4_t a)
10595
- float32x4_t result;
10596
- __asm__ ("frintp %0.4s,%1.4s"
10599
- : /* No clobbers */);
10603
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10604
-vrndqp_f64 (float64x2_t a)
10606
- float64x2_t result;
10607
- __asm__ ("frintp %0.2d,%1.2d"
10610
- : /* No clobbers */);
10614
#define vrshrn_high_n_s16(a, b, c) \
10617
@@ -18788,86 +16885,6 @@
10621
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10622
-vaddv_s32 (int32x2_t a)
10625
- __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10629
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10630
-vaddv_u32 (uint32x2_t a)
10633
- __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10637
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10638
-vmaxnmv_f32 (float32x2_t a)
10640
- float32_t result;
10641
- __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10645
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10646
-vminnmv_f32 (float32x2_t a)
10648
- float32_t result;
10649
- __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10653
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10654
-vmaxnmvq_f64 (float64x2_t a)
10656
- float64_t result;
10657
- __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
10661
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10662
-vmaxv_s32 (int32x2_t a)
10665
- __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10669
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10670
-vmaxv_u32 (uint32x2_t a)
10673
- __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10677
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10678
-vminnmvq_f64 (float64x2_t a)
10680
- float64_t result;
10681
- __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
10685
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10686
-vminv_s32 (int32x2_t a)
10689
- __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10693
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10694
-vminv_u32 (uint32x2_t a)
10697
- __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
10701
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10702
vpaddd_s64 (int64x2_t __a)
10704
@@ -19849,6 +17866,26 @@
10706
/* Start of optimal implementations in approved order. */
10710
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10711
+vabs_f32 (float32x2_t __a)
10713
+ return __builtin_aarch64_absv2sf (__a);
10716
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10717
+vabsq_f32 (float32x4_t __a)
10719
+ return __builtin_aarch64_absv4sf (__a);
10722
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10723
+vabsq_f64 (float64x2_t __a)
10725
+ return __builtin_aarch64_absv2df (__a);
10730
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10731
@@ -19863,8 +17900,238 @@
10738
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10739
+vaddv_s8 (int8x8_t __a)
10741
+ return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
10744
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10745
+vaddv_s16 (int16x4_t __a)
10747
+ return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
10750
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10751
+vaddv_s32 (int32x2_t __a)
10753
+ return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
10756
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10757
+vaddv_u8 (uint8x8_t __a)
10759
+ return vget_lane_u8 ((uint8x8_t)
10760
+ __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
10763
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10764
+vaddv_u16 (uint16x4_t __a)
10766
+ return vget_lane_u16 ((uint16x4_t)
10767
+ __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
10770
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10771
+vaddv_u32 (uint32x2_t __a)
10773
+ return vget_lane_u32 ((uint32x2_t)
10774
+ __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
10777
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
10778
+vaddvq_s8 (int8x16_t __a)
10780
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
10783
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
10784
+vaddvq_s16 (int16x8_t __a)
10786
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
10789
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10790
+vaddvq_s32 (int32x4_t __a)
10792
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
10795
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10796
+vaddvq_s64 (int64x2_t __a)
10798
+ return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
10801
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10802
+vaddvq_u8 (uint8x16_t __a)
10804
+ return vgetq_lane_u8 ((uint8x16_t)
10805
+ __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
10808
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10809
+vaddvq_u16 (uint16x8_t __a)
10811
+ return vgetq_lane_u16 ((uint16x8_t)
10812
+ __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
10815
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10816
+vaddvq_u32 (uint32x4_t __a)
10818
+ return vgetq_lane_u32 ((uint32x4_t)
10819
+ __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
10822
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10823
+vaddvq_u64 (uint64x2_t __a)
10825
+ return vgetq_lane_u64 ((uint64x2_t)
10826
+ __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
10829
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10830
+vaddv_f32 (float32x2_t __a)
10832
+ float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
10833
+ return vget_lane_f32 (t, 0);
10836
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10837
+vaddvq_f32 (float32x4_t __a)
10839
+ float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
10840
+ return vgetq_lane_f32 (t, 0);
10843
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10844
+vaddvq_f64 (float64x2_t __a)
10846
+ float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
10847
+ return vgetq_lane_f64 (t, 0);
10852
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10853
+vcages_f32 (float32_t __a, float32_t __b)
10855
+ return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
10858
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10859
+vcage_f32 (float32x2_t __a, float32x2_t __b)
10861
+ return vabs_f32 (__a) >= vabs_f32 (__b);
10864
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10865
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
10867
+ return vabsq_f32 (__a) >= vabsq_f32 (__b);
10870
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10871
+vcaged_f64 (float64_t __a, float64_t __b)
10873
+ return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
10876
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10877
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
10879
+ return vabsq_f64 (__a) >= vabsq_f64 (__b);
10884
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10885
+vcagts_f32 (float32_t __a, float32_t __b)
10887
+ return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
10890
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10891
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
10893
+ return vabs_f32 (__a) > vabs_f32 (__b);
10896
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10897
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
10899
+ return vabsq_f32 (__a) > vabsq_f32 (__b);
10902
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10903
+vcagtd_f64 (float64_t __a, float64_t __b)
10905
+ return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
10908
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10909
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
10911
+ return vabsq_f64 (__a) > vabsq_f64 (__b);
10916
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10917
+vcale_f32 (float32x2_t __a, float32x2_t __b)
10919
+ return vabs_f32 (__a) <= vabs_f32 (__b);
10922
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10923
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
10925
+ return vabsq_f32 (__a) <= vabsq_f32 (__b);
10928
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10929
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
10931
+ return vabsq_f64 (__a) <= vabsq_f64 (__b);
10936
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10937
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
10939
+ return vabs_f32 (__a) < vabs_f32 (__b);
10942
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10943
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
10945
+ return vabsq_f32 (__a) < vabsq_f32 (__b);
10948
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10949
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
10951
+ return vabsq_f64 (__a) < vabsq_f64 (__b);
10954
+/* vceq - vector. */
10956
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10957
+vceq_f32 (float32x2_t __a, float32x2_t __b)
10959
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
10962
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10963
+vceq_f64 (float64x1_t __a, float64x1_t __b)
10965
+ return __a == __b ? -1ll : 0ll;
10968
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10969
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
10971
@@ -19893,7 +18160,7 @@
10972
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10973
vceq_s64 (int64x1_t __a, int64x1_t __b)
10975
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
10976
+ return __a == __b ? -1ll : 0ll;
10979
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10980
@@ -19920,10 +18187,21 @@
10981
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10982
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
10984
- return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
10985
- (int64x1_t) __b);
10986
+ return __a == __b ? -1ll : 0ll;
10989
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10990
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
10992
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
10995
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10996
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
10998
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
11001
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11002
vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
11004
@@ -19983,27 +18261,245 @@
11008
+/* vceq - scalar. */
11010
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11011
+vceqs_f32 (float32_t __a, float32_t __b)
11013
+ return __a == __b ? -1 : 0;
11016
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11017
vceqd_s64 (int64x1_t __a, int64x1_t __b)
11019
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
11020
+ return __a == __b ? -1ll : 0ll;
11023
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11024
vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
11026
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
11027
+ return __a == __b ? -1ll : 0ll;
11030
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11031
+vceqd_f64 (float64_t __a, float64_t __b)
11033
+ return __a == __b ? -1ll : 0ll;
11036
+/* vceqz - vector. */
11038
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11039
+vceqz_f32 (float32x2_t __a)
11041
+ float32x2_t __b = {0.0f, 0.0f};
11042
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
11045
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11046
+vceqz_f64 (float64x1_t __a)
11048
+ return __a == 0.0 ? -1ll : 0ll;
11051
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11052
+vceqz_p8 (poly8x8_t __a)
11054
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11055
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
11059
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11060
+vceqz_s8 (int8x8_t __a)
11062
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11063
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
11066
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11067
+vceqz_s16 (int16x4_t __a)
11069
+ int16x4_t __b = {0, 0, 0, 0};
11070
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
11073
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11074
+vceqz_s32 (int32x2_t __a)
11076
+ int32x2_t __b = {0, 0};
11077
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
11080
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11081
+vceqz_s64 (int64x1_t __a)
11083
+ return __a == 0ll ? -1ll : 0ll;
11086
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11087
+vceqz_u8 (uint8x8_t __a)
11089
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11090
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
11094
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11095
+vceqz_u16 (uint16x4_t __a)
11097
+ uint16x4_t __b = {0, 0, 0, 0};
11098
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
11099
+ (int16x4_t) __b);
11102
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11103
+vceqz_u32 (uint32x2_t __a)
11105
+ uint32x2_t __b = {0, 0};
11106
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
11107
+ (int32x2_t) __b);
11110
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11111
+vceqz_u64 (uint64x1_t __a)
11113
+ return __a == 0ll ? -1ll : 0ll;
11116
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11117
+vceqzq_f32 (float32x4_t __a)
11119
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
11120
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
11123
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11124
+vceqzq_f64 (float64x2_t __a)
11126
+ float64x2_t __b = {0.0, 0.0};
11127
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
11130
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11131
+vceqzq_p8 (poly8x16_t __a)
11133
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11134
+ 0, 0, 0, 0, 0, 0, 0, 0};
11135
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
11136
+ (int8x16_t) __b);
11139
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11140
+vceqzq_s8 (int8x16_t __a)
11142
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11143
+ 0, 0, 0, 0, 0, 0, 0, 0};
11144
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
11147
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11148
+vceqzq_s16 (int16x8_t __a)
11150
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11151
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
11154
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11155
+vceqzq_s32 (int32x4_t __a)
11157
+ int32x4_t __b = {0, 0, 0, 0};
11158
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
11161
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11162
+vceqzq_s64 (int64x2_t __a)
11164
+ int64x2_t __b = {0, 0};
11165
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
11168
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11169
+vceqzq_u8 (uint8x16_t __a)
11171
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11172
+ 0, 0, 0, 0, 0, 0, 0, 0};
11173
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
11174
+ (int8x16_t) __b);
11177
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11178
+vceqzq_u16 (uint16x8_t __a)
11180
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11181
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
11182
+ (int16x8_t) __b);
11185
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11186
+vceqzq_u32 (uint32x4_t __a)
11188
+ uint32x4_t __b = {0, 0, 0, 0};
11189
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
11190
+ (int32x4_t) __b);
11193
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11194
+vceqzq_u64 (uint64x2_t __a)
11196
+ uint64x2_t __b = {0, 0};
11197
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
11198
+ (int64x2_t) __b);
11201
+/* vceqz - scalar. */
11203
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11204
+vceqzs_f32 (float32_t __a)
11206
+ return __a == 0.0f ? -1 : 0;
11209
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11210
vceqzd_s64 (int64x1_t __a)
11212
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
11213
+ return __a == 0 ? -1ll : 0ll;
11217
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11218
+vceqzd_u64 (int64x1_t __a)
11220
+ return __a == 0 ? -1ll : 0ll;
11223
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11224
+vceqzd_f64 (float64_t __a)
11226
+ return __a == 0.0 ? -1ll : 0ll;
11229
+/* vcge - vector. */
11231
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11232
+vcge_f32 (float32x2_t __a, float32x2_t __b)
11234
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
11237
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11238
+vcge_f64 (float64x1_t __a, float64x1_t __b)
11240
+ return __a >= __b ? -1ll : 0ll;
11243
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11244
+vcge_p8 (poly8x8_t __a, poly8x8_t __b)
11246
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
11250
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11251
vcge_s8 (int8x8_t __a, int8x8_t __b)
11253
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
11254
@@ -20024,38 +18520,56 @@
11255
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11256
vcge_s64 (int64x1_t __a, int64x1_t __b)
11258
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
11259
+ return __a >= __b ? -1ll : 0ll;
11262
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11263
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
11265
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
11266
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
11270
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11271
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
11273
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
11274
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
11278
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11279
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
11281
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
11282
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
11286
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11287
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
11289
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
11290
- (int64x1_t) __b);
11291
+ return __a >= __b ? -1ll : 0ll;
11294
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11295
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
11297
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
11300
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11301
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
11303
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
11306
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11307
+vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
11309
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
11310
+ (int8x16_t) __b);
11313
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11314
vcgeq_s8 (int8x16_t __a, int8x16_t __b)
11316
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
11317
@@ -20082,53 +18596,270 @@
11318
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11319
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
11321
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
11322
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
11326
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11327
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
11329
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
11330
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
11334
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11335
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
11337
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
11338
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
11342
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11343
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
11345
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
11346
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
11350
+/* vcge - scalar. */
11352
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11353
+vcges_f32 (float32_t __a, float32_t __b)
11355
+ return __a >= __b ? -1 : 0;
11358
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11359
vcged_s64 (int64x1_t __a, int64x1_t __b)
11361
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
11362
+ return __a >= __b ? -1ll : 0ll;
11365
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11366
vcged_u64 (uint64x1_t __a, uint64x1_t __b)
11368
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
11369
- (int64x1_t) __b);
11370
+ return __a >= __b ? -1ll : 0ll;
11373
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11374
+vcged_f64 (float64_t __a, float64_t __b)
11376
+ return __a >= __b ? -1ll : 0ll;
11379
+/* vcgez - vector. */
11381
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11382
+vcgez_f32 (float32x2_t __a)
11384
+ float32x2_t __b = {0.0f, 0.0f};
11385
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
11388
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11389
+vcgez_f64 (float64x1_t __a)
11391
+ return __a >= 0.0 ? -1ll : 0ll;
11394
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11395
+vcgez_p8 (poly8x8_t __a)
11397
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11398
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
11402
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11403
+vcgez_s8 (int8x8_t __a)
11405
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11406
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
11409
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11410
+vcgez_s16 (int16x4_t __a)
11412
+ int16x4_t __b = {0, 0, 0, 0};
11413
+ return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
11416
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11417
+vcgez_s32 (int32x2_t __a)
11419
+ int32x2_t __b = {0, 0};
11420
+ return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
11423
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11424
+vcgez_s64 (int64x1_t __a)
11426
+ return __a >= 0ll ? -1ll : 0ll;
11429
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11430
+vcgez_u8 (uint8x8_t __a)
11432
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11433
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
11437
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11438
+vcgez_u16 (uint16x4_t __a)
11440
+ uint16x4_t __b = {0, 0, 0, 0};
11441
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
11442
+ (int16x4_t) __b);
11445
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11446
+vcgez_u32 (uint32x2_t __a)
11448
+ uint32x2_t __b = {0, 0};
11449
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
11450
+ (int32x2_t) __b);
11453
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11454
+vcgez_u64 (uint64x1_t __a)
11456
+ return __a >= 0ll ? -1ll : 0ll;
11459
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11460
+vcgezq_f32 (float32x4_t __a)
11462
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
11463
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
11466
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11467
+vcgezq_f64 (float64x2_t __a)
11469
+ float64x2_t __b = {0.0, 0.0};
11470
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
11473
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11474
+vcgezq_p8 (poly8x16_t __a)
11476
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11477
+ 0, 0, 0, 0, 0, 0, 0, 0};
11478
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
11479
+ (int8x16_t) __b);
11482
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11483
+vcgezq_s8 (int8x16_t __a)
11485
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11486
+ 0, 0, 0, 0, 0, 0, 0, 0};
11487
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
11490
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11491
+vcgezq_s16 (int16x8_t __a)
11493
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11494
+ return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
11497
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11498
+vcgezq_s32 (int32x4_t __a)
11500
+ int32x4_t __b = {0, 0, 0, 0};
11501
+ return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
11504
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11505
+vcgezq_s64 (int64x2_t __a)
11507
+ int64x2_t __b = {0, 0};
11508
+ return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
11511
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11512
+vcgezq_u8 (uint8x16_t __a)
11514
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11515
+ 0, 0, 0, 0, 0, 0, 0, 0};
11516
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
11517
+ (int8x16_t) __b);
11520
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11521
+vcgezq_u16 (uint16x8_t __a)
11523
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11524
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
11525
+ (int16x8_t) __b);
11528
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11529
+vcgezq_u32 (uint32x4_t __a)
11531
+ uint32x4_t __b = {0, 0, 0, 0};
11532
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
11533
+ (int32x4_t) __b);
11536
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11537
+vcgezq_u64 (uint64x2_t __a)
11539
+ uint64x2_t __b = {0, 0};
11540
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
11541
+ (int64x2_t) __b);
11544
+/* vcgez - scalar. */
11546
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11547
+vcgezs_f32 (float32_t __a)
11549
+ return __a >= 0.0f ? -1 : 0;
11552
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11553
vcgezd_s64 (int64x1_t __a)
11555
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
11556
+ return __a >= 0 ? -1ll : 0ll;
11560
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11561
+vcgezd_u64 (int64x1_t __a)
11563
+ return __a >= 0 ? -1ll : 0ll;
11566
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11567
+vcgezd_f64 (float64_t __a)
11569
+ return __a >= 0.0 ? -1ll : 0ll;
11572
+/* vcgt - vector. */
11574
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11575
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
11577
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
11580
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11581
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
11583
+ return __a > __b ? -1ll : 0ll;
11586
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11587
+vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
11589
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
11593
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11594
vcgt_s8 (int8x8_t __a, int8x8_t __b)
11596
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
11597
@@ -20149,38 +18880,56 @@
11598
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11599
vcgt_s64 (int64x1_t __a, int64x1_t __b)
11601
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
11602
+ return __a > __b ? -1ll : 0ll;
11605
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11606
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
11608
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
11609
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
11613
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11614
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
11616
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
11617
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
11621
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11622
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
11624
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
11625
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
11629
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11630
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
11632
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
11633
- (int64x1_t) __b);
11634
+ return __a > __b ? -1ll : 0ll;
11637
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11638
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
11640
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
11643
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11644
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
11646
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
11649
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11650
+vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
11652
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
11653
+ (int8x16_t) __b);
11656
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11657
vcgtq_s8 (int8x16_t __a, int8x16_t __b)
11659
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
11660
@@ -20207,53 +18956,270 @@
11661
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11662
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
11664
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
11665
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
11669
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11670
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
11672
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
11673
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
11677
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11678
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
11680
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
11681
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
11685
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11686
vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
11688
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
11689
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
11693
+/* vcgt - scalar. */
11695
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11696
+vcgts_f32 (float32_t __a, float32_t __b)
11698
+ return __a > __b ? -1 : 0;
11701
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11702
vcgtd_s64 (int64x1_t __a, int64x1_t __b)
11704
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
11705
+ return __a > __b ? -1ll : 0ll;
11708
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11709
vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
11711
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
11712
- (int64x1_t) __b);
11713
+ return __a > __b ? -1ll : 0ll;
11716
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11717
+vcgtd_f64 (float64_t __a, float64_t __b)
11719
+ return __a > __b ? -1ll : 0ll;
11722
+/* vcgtz - vector. */
11724
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11725
+vcgtz_f32 (float32x2_t __a)
11727
+ float32x2_t __b = {0.0f, 0.0f};
11728
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
11731
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11732
+vcgtz_f64 (float64x1_t __a)
11734
+ return __a > 0.0 ? -1ll : 0ll;
11737
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11738
+vcgtz_p8 (poly8x8_t __a)
11740
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11741
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
11745
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11746
+vcgtz_s8 (int8x8_t __a)
11748
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11749
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
11752
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11753
+vcgtz_s16 (int16x4_t __a)
11755
+ int16x4_t __b = {0, 0, 0, 0};
11756
+ return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
11759
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11760
+vcgtz_s32 (int32x2_t __a)
11762
+ int32x2_t __b = {0, 0};
11763
+ return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
11766
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11767
+vcgtz_s64 (int64x1_t __a)
11769
+ return __a > 0ll ? -1ll : 0ll;
11772
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11773
+vcgtz_u8 (uint8x8_t __a)
11775
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11776
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
11780
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11781
+vcgtz_u16 (uint16x4_t __a)
11783
+ uint16x4_t __b = {0, 0, 0, 0};
11784
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
11785
+ (int16x4_t) __b);
11788
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11789
+vcgtz_u32 (uint32x2_t __a)
11791
+ uint32x2_t __b = {0, 0};
11792
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
11793
+ (int32x2_t) __b);
11796
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11797
+vcgtz_u64 (uint64x1_t __a)
11799
+ return __a > 0ll ? -1ll : 0ll;
11802
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11803
+vcgtzq_f32 (float32x4_t __a)
11805
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
11806
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
11809
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11810
+vcgtzq_f64 (float64x2_t __a)
11812
+ float64x2_t __b = {0.0, 0.0};
11813
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
11816
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11817
+vcgtzq_p8 (poly8x16_t __a)
11819
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11820
+ 0, 0, 0, 0, 0, 0, 0, 0};
11821
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
11822
+ (int8x16_t) __b);
11825
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11826
+vcgtzq_s8 (int8x16_t __a)
11828
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11829
+ 0, 0, 0, 0, 0, 0, 0, 0};
11830
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
11833
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11834
+vcgtzq_s16 (int16x8_t __a)
11836
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11837
+ return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
11840
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11841
+vcgtzq_s32 (int32x4_t __a)
11843
+ int32x4_t __b = {0, 0, 0, 0};
11844
+ return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
11847
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11848
+vcgtzq_s64 (int64x2_t __a)
11850
+ int64x2_t __b = {0, 0};
11851
+ return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
11854
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11855
+vcgtzq_u8 (uint8x16_t __a)
11857
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
11858
+ 0, 0, 0, 0, 0, 0, 0, 0};
11859
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
11860
+ (int8x16_t) __b);
11863
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11864
+vcgtzq_u16 (uint16x8_t __a)
11866
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
11867
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
11868
+ (int16x8_t) __b);
11871
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11872
+vcgtzq_u32 (uint32x4_t __a)
11874
+ uint32x4_t __b = {0, 0, 0, 0};
11875
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
11876
+ (int32x4_t) __b);
11879
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11880
+vcgtzq_u64 (uint64x2_t __a)
11882
+ uint64x2_t __b = {0, 0};
11883
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
11884
+ (int64x2_t) __b);
11887
+/* vcgtz - scalar. */
11889
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11890
+vcgtzs_f32 (float32_t __a)
11892
+ return __a > 0.0f ? -1 : 0;
11895
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11896
vcgtzd_s64 (int64x1_t __a)
11898
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
11899
+ return __a > 0 ? -1ll : 0ll;
11903
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11904
+vcgtzd_u64 (int64x1_t __a)
11906
+ return __a > 0 ? -1ll : 0ll;
11909
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11910
+vcgtzd_f64 (float64_t __a)
11912
+ return __a > 0.0 ? -1ll : 0ll;
11915
+/* vcle - vector. */
11917
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11918
+vcle_f32 (float32x2_t __a, float32x2_t __b)
11920
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
11923
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11924
+vcle_f64 (float64x1_t __a, float64x1_t __b)
11926
+ return __a <= __b ? -1ll : 0ll;
11929
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11930
+vcle_p8 (poly8x8_t __a, poly8x8_t __b)
11932
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
11936
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11937
vcle_s8 (int8x8_t __a, int8x8_t __b)
11939
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
11940
@@ -20274,38 +19240,56 @@
11941
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11942
vcle_s64 (int64x1_t __a, int64x1_t __b)
11944
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
11945
+ return __a <= __b ? -1ll : 0ll;
11948
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11949
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
11951
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
11952
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
11956
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11957
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
11959
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
11960
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
11964
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11965
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
11967
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
11968
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
11972
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11973
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
11975
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
11976
- (int64x1_t) __a);
11977
+ return __a <= __b ? -1ll : 0ll;
11980
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11981
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
11983
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
11986
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11987
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
11989
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
11992
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11993
+vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
11995
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
11996
+ (int8x16_t) __a);
11999
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12000
vcleq_s8 (int8x16_t __a, int8x16_t __b)
12002
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
12003
@@ -20332,46 +19316,213 @@
12004
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12005
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
12007
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
12008
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
12012
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12013
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
12015
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
12016
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
12020
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12021
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
12023
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
12024
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
12028
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12029
vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
12031
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
12032
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
12036
+/* vcle - scalar. */
12038
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12039
+vcles_f32 (float32_t __a, float32_t __b)
12041
+ return __a <= __b ? -1 : 0;
12044
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12045
vcled_s64 (int64x1_t __a, int64x1_t __b)
12047
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
12048
+ return __a <= __b ? -1ll : 0ll;
12051
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12052
+vcled_u64 (uint64x1_t __a, uint64x1_t __b)
12054
+ return __a <= __b ? -1ll : 0ll;
12057
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12058
+vcled_f64 (float64_t __a, float64_t __b)
12060
+ return __a <= __b ? -1ll : 0ll;
12063
+/* vclez - vector. */
12065
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12066
+vclez_f32 (float32x2_t __a)
12068
+ float32x2_t __b = {0.0f, 0.0f};
12069
+ return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
12072
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12073
+vclez_f64 (float64x1_t __a)
12075
+ return __a <= 0.0 ? -1ll : 0ll;
12078
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12079
+vclez_p8 (poly8x8_t __a)
12081
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12082
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
12086
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12087
+vclez_s8 (int8x8_t __a)
12089
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12090
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
12093
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12094
+vclez_s16 (int16x4_t __a)
12096
+ int16x4_t __b = {0, 0, 0, 0};
12097
+ return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
12100
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12101
+vclez_s32 (int32x2_t __a)
12103
+ int32x2_t __b = {0, 0};
12104
+ return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
12107
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12108
+vclez_s64 (int64x1_t __a)
12110
+ return __a <= 0ll ? -1ll : 0ll;
12113
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12114
+vclez_u64 (uint64x1_t __a)
12116
+ return __a <= 0ll ? -1ll : 0ll;
12119
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12120
+vclezq_f32 (float32x4_t __a)
12122
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
12123
+ return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
12126
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12127
+vclezq_f64 (float64x2_t __a)
12129
+ float64x2_t __b = {0.0, 0.0};
12130
+ return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
12133
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12134
+vclezq_p8 (poly8x16_t __a)
12136
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
12137
+ 0, 0, 0, 0, 0, 0, 0, 0};
12138
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
12139
+ (int8x16_t) __b);
12142
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12143
+vclezq_s8 (int8x16_t __a)
12145
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
12146
+ 0, 0, 0, 0, 0, 0, 0, 0};
12147
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
12150
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12151
+vclezq_s16 (int16x8_t __a)
12153
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12154
+ return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
12157
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12158
+vclezq_s32 (int32x4_t __a)
12160
+ int32x4_t __b = {0, 0, 0, 0};
12161
+ return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
12164
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12165
+vclezq_s64 (int64x2_t __a)
12167
+ int64x2_t __b = {0, 0};
12168
+ return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
12171
+/* vclez - scalar. */
12173
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12174
+vclezs_f32 (float32_t __a)
12176
+ return __a <= 0.0f ? -1 : 0;
12179
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12180
vclezd_s64 (int64x1_t __a)
12182
- return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
12183
+ return __a <= 0 ? -1ll : 0ll;
12187
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12188
+vclezd_u64 (int64x1_t __a)
12190
+ return __a <= 0 ? -1ll : 0ll;
12193
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12194
+vclezd_f64 (float64_t __a)
12196
+ return __a <= 0.0 ? -1ll : 0ll;
12199
+/* vclt - vector. */
12201
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12202
+vclt_f32 (float32x2_t __a, float32x2_t __b)
12204
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
12207
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12208
+vclt_f64 (float64x1_t __a, float64x1_t __b)
12210
+ return __a < __b ? -1ll : 0ll;
12213
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12214
+vclt_p8 (poly8x8_t __a, poly8x8_t __b)
12216
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
12220
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12221
vclt_s8 (int8x8_t __a, int8x8_t __b)
12223
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
12224
@@ -20392,38 +19543,56 @@
12225
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12226
vclt_s64 (int64x1_t __a, int64x1_t __b)
12228
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
12229
+ return __a < __b ? -1ll : 0ll;
12232
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12233
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
12235
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
12236
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
12240
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12241
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
12243
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
12244
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
12248
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12249
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
12251
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
12252
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
12256
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12257
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
12259
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
12260
- (int64x1_t) __a);
12261
+ return __a < __b ? -1ll : 0ll;
12264
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12265
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
12267
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
12270
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12271
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
12273
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
12276
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12277
+vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
12279
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
12280
+ (int8x16_t) __a);
12283
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12284
vcltq_s8 (int8x16_t __a, int8x16_t __b)
12286
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
12287
@@ -20450,43 +19619,616 @@
12288
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12289
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
12291
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
12292
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
12296
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12297
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
12299
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
12300
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
12304
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12305
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
12307
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
12308
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
12312
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12313
vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
12315
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
12316
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
12320
+/* vclt - scalar. */
12322
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12323
+vclts_f32 (float32_t __a, float32_t __b)
12325
+ return __a < __b ? -1 : 0;
12328
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12329
vcltd_s64 (int64x1_t __a, int64x1_t __b)
12331
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
12332
+ return __a < __b ? -1ll : 0ll;
12335
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12336
+vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
12338
+ return __a < __b ? -1ll : 0ll;
12341
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12342
+vcltd_f64 (float64_t __a, float64_t __b)
12344
+ return __a < __b ? -1ll : 0ll;
12347
+/* vcltz - vector. */
12349
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12350
+vcltz_f32 (float32x2_t __a)
12352
+ float32x2_t __b = {0.0f, 0.0f};
12353
+ return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
12356
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12357
+vcltz_f64 (float64x1_t __a)
12359
+ return __a < 0.0 ? -1ll : 0ll;
12362
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12363
+vcltz_p8 (poly8x8_t __a)
12365
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12366
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
12370
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12371
+vcltz_s8 (int8x8_t __a)
12373
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12374
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
12377
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12378
+vcltz_s16 (int16x4_t __a)
12380
+ int16x4_t __b = {0, 0, 0, 0};
12381
+ return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
12384
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12385
+vcltz_s32 (int32x2_t __a)
12387
+ int32x2_t __b = {0, 0};
12388
+ return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
12391
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12392
+vcltz_s64 (int64x1_t __a)
12394
+ return __a < 0ll ? -1ll : 0ll;
12397
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12398
+vcltzq_f32 (float32x4_t __a)
12400
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
12401
+ return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
12404
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12405
+vcltzq_f64 (float64x2_t __a)
12407
+ float64x2_t __b = {0.0, 0.0};
12408
+ return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
12411
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12412
+vcltzq_p8 (poly8x16_t __a)
12414
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
12415
+ 0, 0, 0, 0, 0, 0, 0, 0};
12416
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
12417
+ (int8x16_t) __b);
12420
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12421
+vcltzq_s8 (int8x16_t __a)
12423
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
12424
+ 0, 0, 0, 0, 0, 0, 0, 0};
12425
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
12428
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12429
+vcltzq_s16 (int16x8_t __a)
12431
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
12432
+ return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
12435
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12436
+vcltzq_s32 (int32x4_t __a)
12438
+ int32x4_t __b = {0, 0, 0, 0};
12439
+ return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
12442
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12443
+vcltzq_s64 (int64x2_t __a)
12445
+ int64x2_t __b = {0, 0};
12446
+ return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
12449
+/* vcltz - scalar. */
12451
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12452
+vcltzs_f32 (float32_t __a)
12454
+ return __a < 0.0f ? -1 : 0;
12457
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12458
vcltzd_s64 (int64x1_t __a)
12460
- return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
12461
+ return __a < 0 ? -1ll : 0ll;
12464
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12465
+vcltzd_u64 (int64x1_t __a)
12467
+ return __a < 0 ? -1ll : 0ll;
12470
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12471
+vcltzd_f64 (float64_t __a)
12473
+ return __a < 0.0 ? -1ll : 0ll;
12476
+/* vcvt (double -> float). */
12478
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12479
+vcvt_f32_f64 (float64x2_t __a)
12481
+ return __builtin_aarch64_float_truncate_lo_v2sf (__a);
12484
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12485
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
12487
+ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
12490
+/* vcvt (float -> double). */
12492
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12493
+vcvt_f64_f32 (float32x2_t __a)
12496
+ return __builtin_aarch64_float_extend_lo_v2df (__a);
12499
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12500
+vcvt_high_f64_f32 (float32x4_t __a)
12502
+ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
12505
+/* vcvt (<u>int -> float) */
12507
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12508
+vcvtd_f64_s64 (int64_t __a)
12510
+ return (float64_t) __a;
12513
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
12514
+vcvtd_f64_u64 (uint64_t __a)
12516
+ return (float64_t) __a;
12519
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12520
+vcvts_f32_s32 (int32_t __a)
12522
+ return (float32_t) __a;
12525
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12526
+vcvts_f32_u32 (uint32_t __a)
12528
+ return (float32_t) __a;
12531
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12532
+vcvt_f32_s32 (int32x2_t __a)
12534
+ return __builtin_aarch64_floatv2siv2sf (__a);
12537
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12538
+vcvt_f32_u32 (uint32x2_t __a)
12540
+ return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
12543
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12544
+vcvtq_f32_s32 (int32x4_t __a)
12546
+ return __builtin_aarch64_floatv4siv4sf (__a);
12549
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12550
+vcvtq_f32_u32 (uint32x4_t __a)
12552
+ return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
12555
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12556
+vcvtq_f64_s64 (int64x2_t __a)
12558
+ return __builtin_aarch64_floatv2div2df (__a);
12561
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12562
+vcvtq_f64_u64 (uint64x2_t __a)
12564
+ return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
12567
+/* vcvt (float -> <u>int) */
12569
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12570
+vcvtd_s64_f64 (float64_t __a)
12572
+ return (int64_t) __a;
12575
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12576
+vcvtd_u64_f64 (float64_t __a)
12578
+ return (uint64_t) __a;
12581
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12582
+vcvts_s32_f32 (float32_t __a)
12584
+ return (int32_t) __a;
12587
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12588
+vcvts_u32_f32 (float32_t __a)
12590
+ return (uint32_t) __a;
12593
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12594
+vcvt_s32_f32 (float32x2_t __a)
12596
+ return __builtin_aarch64_lbtruncv2sfv2si (__a);
12599
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12600
+vcvt_u32_f32 (float32x2_t __a)
12602
+ /* TODO: This cast should go away when builtins have
12603
+ their correct types. */
12604
+ return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
12607
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12608
+vcvtq_s32_f32 (float32x4_t __a)
12610
+ return __builtin_aarch64_lbtruncv4sfv4si (__a);
12613
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12614
+vcvtq_u32_f32 (float32x4_t __a)
12616
+ /* TODO: This cast should go away when builtins have
12617
+ their correct types. */
12618
+ return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
12621
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12622
+vcvtq_s64_f64 (float64x2_t __a)
12624
+ return __builtin_aarch64_lbtruncv2dfv2di (__a);
12627
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12628
+vcvtq_u64_f64 (float64x2_t __a)
12630
+ /* TODO: This cast should go away when builtins have
12631
+ their correct types. */
12632
+ return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
12637
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12638
+vcvtad_s64_f64 (float64_t __a)
12640
+ return __builtin_aarch64_lrounddfdi (__a);
12643
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12644
+vcvtad_u64_f64 (float64_t __a)
12646
+ return __builtin_aarch64_lroundudfdi (__a);
12649
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12650
+vcvtas_s32_f32 (float32_t __a)
12652
+ return __builtin_aarch64_lroundsfsi (__a);
12655
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12656
+vcvtas_u32_f32 (float32_t __a)
12658
+ return __builtin_aarch64_lroundusfsi (__a);
12661
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12662
+vcvta_s32_f32 (float32x2_t __a)
12664
+ return __builtin_aarch64_lroundv2sfv2si (__a);
12667
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12668
+vcvta_u32_f32 (float32x2_t __a)
12670
+ /* TODO: This cast should go away when builtins have
12671
+ their correct types. */
12672
+ return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
12675
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12676
+vcvtaq_s32_f32 (float32x4_t __a)
12678
+ return __builtin_aarch64_lroundv4sfv4si (__a);
12681
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12682
+vcvtaq_u32_f32 (float32x4_t __a)
12684
+ /* TODO: This cast should go away when builtins have
12685
+ their correct types. */
12686
+ return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
12689
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12690
+vcvtaq_s64_f64 (float64x2_t __a)
12692
+ return __builtin_aarch64_lroundv2dfv2di (__a);
12695
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12696
+vcvtaq_u64_f64 (float64x2_t __a)
12698
+ /* TODO: This cast should go away when builtins have
12699
+ their correct types. */
12700
+ return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
12705
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12706
+vcvtmd_s64_f64 (float64_t __a)
12708
+ return __builtin_lfloor (__a);
12711
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12712
+vcvtmd_u64_f64 (float64_t __a)
12714
+ return __builtin_aarch64_lfloorudfdi (__a);
12717
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12718
+vcvtms_s32_f32 (float32_t __a)
12720
+ return __builtin_ifloorf (__a);
12723
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12724
+vcvtms_u32_f32 (float32_t __a)
12726
+ return __builtin_aarch64_lfloorusfsi (__a);
12729
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12730
+vcvtm_s32_f32 (float32x2_t __a)
12732
+ return __builtin_aarch64_lfloorv2sfv2si (__a);
12735
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12736
+vcvtm_u32_f32 (float32x2_t __a)
12738
+ /* TODO: This cast should go away when builtins have
12739
+ their correct types. */
12740
+ return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
12743
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12744
+vcvtmq_s32_f32 (float32x4_t __a)
12746
+ return __builtin_aarch64_lfloorv4sfv4si (__a);
12749
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12750
+vcvtmq_u32_f32 (float32x4_t __a)
12752
+ /* TODO: This cast should go away when builtins have
12753
+ their correct types. */
12754
+ return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
12757
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12758
+vcvtmq_s64_f64 (float64x2_t __a)
12760
+ return __builtin_aarch64_lfloorv2dfv2di (__a);
12763
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12764
+vcvtmq_u64_f64 (float64x2_t __a)
12766
+ /* TODO: This cast should go away when builtins have
12767
+ their correct types. */
12768
+ return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
12773
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12774
+vcvtnd_s64_f64 (float64_t __a)
12776
+ return __builtin_aarch64_lfrintndfdi (__a);
12779
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12780
+vcvtnd_u64_f64 (float64_t __a)
12782
+ return __builtin_aarch64_lfrintnudfdi (__a);
12785
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12786
+vcvtns_s32_f32 (float32_t __a)
12788
+ return __builtin_aarch64_lfrintnsfsi (__a);
12791
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12792
+vcvtns_u32_f32 (float32_t __a)
12794
+ return __builtin_aarch64_lfrintnusfsi (__a);
12797
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12798
+vcvtn_s32_f32 (float32x2_t __a)
12800
+ return __builtin_aarch64_lfrintnv2sfv2si (__a);
12803
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12804
+vcvtn_u32_f32 (float32x2_t __a)
12806
+ /* TODO: This cast should go away when builtins have
12807
+ their correct types. */
12808
+ return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
12811
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12812
+vcvtnq_s32_f32 (float32x4_t __a)
12814
+ return __builtin_aarch64_lfrintnv4sfv4si (__a);
12817
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12818
+vcvtnq_u32_f32 (float32x4_t __a)
12820
+ /* TODO: This cast should go away when builtins have
12821
+ their correct types. */
12822
+ return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
12825
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12826
+vcvtnq_s64_f64 (float64x2_t __a)
12828
+ return __builtin_aarch64_lfrintnv2dfv2di (__a);
12831
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12832
+vcvtnq_u64_f64 (float64x2_t __a)
12834
+ /* TODO: This cast should go away when builtins have
12835
+ their correct types. */
12836
+ return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
12841
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
12842
+vcvtpd_s64_f64 (float64_t __a)
12844
+ return __builtin_lceil (__a);
12847
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12848
+vcvtpd_u64_f64 (float64_t __a)
12850
+ return __builtin_aarch64_lceiludfdi (__a);
12853
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12854
+vcvtps_s32_f32 (float32_t __a)
12856
+ return __builtin_iceilf (__a);
12859
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12860
+vcvtps_u32_f32 (float32_t __a)
12862
+ return __builtin_aarch64_lceilusfsi (__a);
12865
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12866
+vcvtp_s32_f32 (float32x2_t __a)
12868
+ return __builtin_aarch64_lceilv2sfv2si (__a);
12871
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12872
+vcvtp_u32_f32 (float32x2_t __a)
12874
+ /* TODO: This cast should go away when builtins have
12875
+ their correct types. */
12876
+ return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
12879
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12880
+vcvtpq_s32_f32 (float32x4_t __a)
12882
+ return __builtin_aarch64_lceilv4sfv4si (__a);
12885
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12886
+vcvtpq_u32_f32 (float32x4_t __a)
12888
+ /* TODO: This cast should go away when builtins have
12889
+ their correct types. */
12890
+ return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
12893
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12894
+vcvtpq_s64_f64 (float64x2_t __a)
12896
+ return __builtin_aarch64_lceilv2dfv2di (__a);
12899
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12900
+vcvtpq_u64_f64 (float64x2_t __a)
12902
+ /* TODO: This cast should go away when builtins have
12903
+ their correct types. */
12904
+ return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
12909
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
12910
@@ -21408,7 +21150,7 @@
12911
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12912
vmax_f32 (float32x2_t __a, float32x2_t __b)
12914
- return __builtin_aarch64_fmaxv2sf (__a, __b);
12915
+ return __builtin_aarch64_smax_nanv2sf (__a, __b);
12918
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12919
@@ -21453,13 +21195,13 @@
12920
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12921
vmaxq_f32 (float32x4_t __a, float32x4_t __b)
12923
- return __builtin_aarch64_fmaxv4sf (__a, __b);
12924
+ return __builtin_aarch64_smax_nanv4sf (__a, __b);
12927
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12928
vmaxq_f64 (float64x2_t __a, float64x2_t __b)
12930
- return __builtin_aarch64_fmaxv2df (__a, __b);
12931
+ return __builtin_aarch64_smax_nanv2df (__a, __b);
12934
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12935
@@ -21501,12 +21243,150 @@
12942
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12943
+vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
12945
+ return __builtin_aarch64_smaxv2sf (__a, __b);
12948
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12949
+vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
12951
+ return __builtin_aarch64_smaxv4sf (__a, __b);
12954
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12955
+vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
12957
+ return __builtin_aarch64_smaxv2df (__a, __b);
12962
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
12963
+vmaxv_f32 (float32x2_t __a)
12965
+ return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
12968
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
12969
+vmaxv_s8 (int8x8_t __a)
12971
+ return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
12974
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
12975
+vmaxv_s16 (int16x4_t __a)
12977
+ return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
12980
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
12981
+vmaxv_s32 (int32x2_t __a)
12983
+ return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
12986
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
12987
+vmaxv_u8 (uint8x8_t __a)
12989
+ return vget_lane_u8 ((uint8x8_t)
12990
+ __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
12993
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
12994
+vmaxv_u16 (uint16x4_t __a)
12996
+ return vget_lane_u16 ((uint16x4_t)
12997
+ __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
13000
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13001
+vmaxv_u32 (uint32x2_t __a)
13003
+ return vget_lane_u32 ((uint32x2_t)
13004
+ __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
13007
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13008
+vmaxvq_f32 (float32x4_t __a)
13010
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
13013
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13014
+vmaxvq_f64 (float64x2_t __a)
13016
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
13019
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13020
+vmaxvq_s8 (int8x16_t __a)
13022
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
13025
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13026
+vmaxvq_s16 (int16x8_t __a)
13028
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
13031
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13032
+vmaxvq_s32 (int32x4_t __a)
13034
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
13037
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13038
+vmaxvq_u8 (uint8x16_t __a)
13040
+ return vgetq_lane_u8 ((uint8x16_t)
13041
+ __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
13044
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13045
+vmaxvq_u16 (uint16x8_t __a)
13047
+ return vgetq_lane_u16 ((uint16x8_t)
13048
+ __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
13051
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13052
+vmaxvq_u32 (uint32x4_t __a)
13054
+ return vgetq_lane_u32 ((uint32x4_t)
13055
+ __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
13060
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13061
+vmaxnmv_f32 (float32x2_t __a)
13063
+ return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
13066
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13067
+vmaxnmvq_f32 (float32x4_t __a)
13069
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
13072
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13073
+vmaxnmvq_f64 (float64x2_t __a)
13075
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
13080
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13081
vmin_f32 (float32x2_t __a, float32x2_t __b)
13083
- return __builtin_aarch64_fminv2sf (__a, __b);
13084
+ return __builtin_aarch64_smin_nanv2sf (__a, __b);
13087
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13088
@@ -21551,13 +21431,13 @@
13089
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13090
vminq_f32 (float32x4_t __a, float32x4_t __b)
13092
- return __builtin_aarch64_fminv4sf (__a, __b);
13093
+ return __builtin_aarch64_smin_nanv4sf (__a, __b);
13096
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13097
vminq_f64 (float64x2_t __a, float64x2_t __b)
13099
- return __builtin_aarch64_fminv2df (__a, __b);
13100
+ return __builtin_aarch64_smin_nanv2df (__a, __b);
13103
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13104
@@ -21599,6 +21479,144 @@
13110
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13111
+vminnm_f32 (float32x2_t __a, float32x2_t __b)
13113
+ return __builtin_aarch64_sminv2sf (__a, __b);
13116
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13117
+vminnmq_f32 (float32x4_t __a, float32x4_t __b)
13119
+ return __builtin_aarch64_sminv4sf (__a, __b);
13122
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13123
+vminnmq_f64 (float64x2_t __a, float64x2_t __b)
13125
+ return __builtin_aarch64_sminv2df (__a, __b);
13130
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13131
+vminv_f32 (float32x2_t __a)
13133
+ return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
13136
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13137
+vminv_s8 (int8x8_t __a)
13139
+ return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
13142
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13143
+vminv_s16 (int16x4_t __a)
13145
+ return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
13148
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13149
+vminv_s32 (int32x2_t __a)
13151
+ return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
13154
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13155
+vminv_u8 (uint8x8_t __a)
13157
+ return vget_lane_u8 ((uint8x8_t)
13158
+ __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
13161
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13162
+vminv_u16 (uint16x4_t __a)
13164
+ return vget_lane_u16 ((uint16x4_t)
13165
+ __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
13168
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13169
+vminv_u32 (uint32x2_t __a)
13171
+ return vget_lane_u32 ((uint32x2_t)
13172
+ __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
13175
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13176
+vminvq_f32 (float32x4_t __a)
13178
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
13181
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13182
+vminvq_f64 (float64x2_t __a)
13184
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
13187
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
13188
+vminvq_s8 (int8x16_t __a)
13190
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
13193
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
13194
+vminvq_s16 (int16x8_t __a)
13196
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
13199
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
13200
+vminvq_s32 (int32x4_t __a)
13202
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
13205
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13206
+vminvq_u8 (uint8x16_t __a)
13208
+ return vgetq_lane_u8 ((uint8x16_t)
13209
+ __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
13212
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13213
+vminvq_u16 (uint16x8_t __a)
13215
+ return vgetq_lane_u16 ((uint16x8_t)
13216
+ __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
13219
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13220
+vminvq_u32 (uint32x4_t __a)
13222
+ return vgetq_lane_u32 ((uint32x4_t)
13223
+ __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
13228
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13229
+vminnmv_f32 (float32x2_t __a)
13231
+ return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
13234
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13235
+vminnmvq_f32 (float32x4_t __a)
13237
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
13240
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13241
+vminnmvq_f64 (float64x2_t __a)
13243
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
13248
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13249
@@ -23115,6 +23133,223 @@
13250
return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
13255
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13256
+vrecpes_f32 (float32_t __a)
13258
+ return __builtin_aarch64_frecpesf (__a);
13261
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13262
+vrecped_f64 (float64_t __a)
13264
+ return __builtin_aarch64_frecpedf (__a);
13267
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13268
+vrecpe_f32 (float32x2_t __a)
13270
+ return __builtin_aarch64_frecpev2sf (__a);
13273
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13274
+vrecpeq_f32 (float32x4_t __a)
13276
+ return __builtin_aarch64_frecpev4sf (__a);
13279
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13280
+vrecpeq_f64 (float64x2_t __a)
13282
+ return __builtin_aarch64_frecpev2df (__a);
13287
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13288
+vrecpss_f32 (float32_t __a, float32_t __b)
13290
+ return __builtin_aarch64_frecpssf (__a, __b);
13293
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13294
+vrecpsd_f64 (float64_t __a, float64_t __b)
13296
+ return __builtin_aarch64_frecpsdf (__a, __b);
13299
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13300
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
13302
+ return __builtin_aarch64_frecpsv2sf (__a, __b);
13305
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13306
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
13308
+ return __builtin_aarch64_frecpsv4sf (__a, __b);
13311
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13312
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
13314
+ return __builtin_aarch64_frecpsv2df (__a, __b);
13319
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
13320
+vrecpxs_f32 (float32_t __a)
13322
+ return __builtin_aarch64_frecpxsf (__a);
13325
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
13326
+vrecpxd_f64 (float64_t __a)
13328
+ return __builtin_aarch64_frecpxdf (__a);
13333
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13334
+vrnd_f32 (float32x2_t __a)
13336
+ return __builtin_aarch64_btruncv2sf (__a);
13339
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13340
+vrndq_f32 (float32x4_t __a)
13342
+ return __builtin_aarch64_btruncv4sf (__a);
13345
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13346
+vrndq_f64 (float64x2_t __a)
13348
+ return __builtin_aarch64_btruncv2df (__a);
13353
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13354
+vrnda_f32 (float32x2_t __a)
13356
+ return __builtin_aarch64_roundv2sf (__a);
13359
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13360
+vrndaq_f32 (float32x4_t __a)
13362
+ return __builtin_aarch64_roundv4sf (__a);
13365
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13366
+vrndaq_f64 (float64x2_t __a)
13368
+ return __builtin_aarch64_roundv2df (__a);
13373
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13374
+vrndi_f32 (float32x2_t __a)
13376
+ return __builtin_aarch64_nearbyintv2sf (__a);
13379
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13380
+vrndiq_f32 (float32x4_t __a)
13382
+ return __builtin_aarch64_nearbyintv4sf (__a);
13385
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13386
+vrndiq_f64 (float64x2_t __a)
13388
+ return __builtin_aarch64_nearbyintv2df (__a);
13393
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13394
+vrndm_f32 (float32x2_t __a)
13396
+ return __builtin_aarch64_floorv2sf (__a);
13399
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13400
+vrndmq_f32 (float32x4_t __a)
13402
+ return __builtin_aarch64_floorv4sf (__a);
13405
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13406
+vrndmq_f64 (float64x2_t __a)
13408
+ return __builtin_aarch64_floorv2df (__a);
13413
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13414
+vrndn_f32 (float32x2_t __a)
13416
+ return __builtin_aarch64_frintnv2sf (__a);
13418
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13419
+vrndnq_f32 (float32x4_t __a)
13421
+ return __builtin_aarch64_frintnv4sf (__a);
13424
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13425
+vrndnq_f64 (float64x2_t __a)
13427
+ return __builtin_aarch64_frintnv2df (__a);
13432
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13433
+vrndp_f32 (float32x2_t __a)
13435
+ return __builtin_aarch64_ceilv2sf (__a);
13438
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13439
+vrndpq_f32 (float32x4_t __a)
13441
+ return __builtin_aarch64_ceilv4sf (__a);
13444
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13445
+vrndpq_f64 (float64x2_t __a)
13447
+ return __builtin_aarch64_ceilv2df (__a);
13452
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13453
+vrndx_f32 (float32x2_t __a)
13455
+ return __builtin_aarch64_rintv2sf (__a);
13458
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13459
+vrndxq_f32 (float32x4_t __a)
13461
+ return __builtin_aarch64_rintv4sf (__a);
13464
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13465
+vrndxq_f64 (float64x2_t __a)
13467
+ return __builtin_aarch64_rintv2df (__a);
13472
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13473
@@ -23458,109 +23693,109 @@
13474
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13475
vshl_n_s8 (int8x8_t __a, const int __b)
13477
- return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b);
13478
+ return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
13481
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13482
vshl_n_s16 (int16x4_t __a, const int __b)
13484
- return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b);
13485
+ return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
13488
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13489
vshl_n_s32 (int32x2_t __a, const int __b)
13491
- return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b);
13492
+ return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
13495
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13496
vshl_n_s64 (int64x1_t __a, const int __b)
13498
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
13499
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
13502
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13503
vshl_n_u8 (uint8x8_t __a, const int __b)
13505
- return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b);
13506
+ return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
13509
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13510
vshl_n_u16 (uint16x4_t __a, const int __b)
13512
- return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b);
13513
+ return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
13516
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13517
vshl_n_u32 (uint32x2_t __a, const int __b)
13519
- return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b);
13520
+ return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
13523
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13524
vshl_n_u64 (uint64x1_t __a, const int __b)
13526
- return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b);
13527
+ return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
13530
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13531
vshlq_n_s8 (int8x16_t __a, const int __b)
13533
- return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b);
13534
+ return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
13537
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13538
vshlq_n_s16 (int16x8_t __a, const int __b)
13540
- return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b);
13541
+ return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
13544
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13545
vshlq_n_s32 (int32x4_t __a, const int __b)
13547
- return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b);
13548
+ return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
13551
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13552
vshlq_n_s64 (int64x2_t __a, const int __b)
13554
- return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b);
13555
+ return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
13558
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13559
vshlq_n_u8 (uint8x16_t __a, const int __b)
13561
- return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b);
13562
+ return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
13565
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13566
vshlq_n_u16 (uint16x8_t __a, const int __b)
13568
- return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b);
13569
+ return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
13572
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13573
vshlq_n_u32 (uint32x4_t __a, const int __b)
13575
- return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b);
13576
+ return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
13579
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13580
vshlq_n_u64 (uint64x2_t __a, const int __b)
13582
- return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b);
13583
+ return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
13586
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13587
vshld_n_s64 (int64x1_t __a, const int __b)
13589
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
13590
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
13593
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13594
vshld_n_u64 (uint64x1_t __a, const int __b)
13596
- return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b);
13597
+ return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
13600
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13601
@@ -23748,109 +23983,109 @@
13602
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13603
vshr_n_s8 (int8x8_t __a, const int __b)
13605
- return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b);
13606
+ return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
13609
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13610
vshr_n_s16 (int16x4_t __a, const int __b)
13612
- return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b);
13613
+ return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
13616
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13617
vshr_n_s32 (int32x2_t __a, const int __b)
13619
- return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b);
13620
+ return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
13623
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13624
vshr_n_s64 (int64x1_t __a, const int __b)
13626
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
13627
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
13630
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13631
vshr_n_u8 (uint8x8_t __a, const int __b)
13633
- return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b);
13634
+ return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
13637
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13638
vshr_n_u16 (uint16x4_t __a, const int __b)
13640
- return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b);
13641
+ return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
13644
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13645
vshr_n_u32 (uint32x2_t __a, const int __b)
13647
- return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b);
13648
+ return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
13651
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13652
vshr_n_u64 (uint64x1_t __a, const int __b)
13654
- return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b);
13655
+ return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
13658
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13659
vshrq_n_s8 (int8x16_t __a, const int __b)
13661
- return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b);
13662
+ return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
13665
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13666
vshrq_n_s16 (int16x8_t __a, const int __b)
13668
- return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b);
13669
+ return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
13672
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13673
vshrq_n_s32 (int32x4_t __a, const int __b)
13675
- return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b);
13676
+ return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
13679
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13680
vshrq_n_s64 (int64x2_t __a, const int __b)
13682
- return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b);
13683
+ return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
13686
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13687
vshrq_n_u8 (uint8x16_t __a, const int __b)
13689
- return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b);
13690
+ return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
13693
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13694
vshrq_n_u16 (uint16x8_t __a, const int __b)
13696
- return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b);
13697
+ return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
13700
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13701
vshrq_n_u32 (uint32x4_t __a, const int __b)
13703
- return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b);
13704
+ return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
13707
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13708
vshrq_n_u64 (uint64x2_t __a, const int __b)
13710
- return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b);
13711
+ return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
13714
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13715
vshrd_n_s64 (int64x1_t __a, const int __b)
13717
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
13718
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
13721
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13722
vshrd_n_u64 (uint64x1_t __a, const int __b)
13724
- return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b);
13725
+ return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
13729
@@ -25320,7 +25555,7 @@
13730
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13731
vtst_s64 (int64x1_t __a, int64x1_t __b)
13733
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
13734
+ return (__a & __b) ? -1ll : 0ll;
13737
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13738
@@ -25347,8 +25582,7 @@
13739
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13740
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
13742
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
13743
- (int64x1_t) __b);
13744
+ return (__a & __b) ? -1ll : 0ll;
13747
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13748
@@ -25406,14 +25640,13 @@
13749
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13750
vtstd_s64 (int64x1_t __a, int64x1_t __b)
13752
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
13753
+ return (__a & __b) ? -1ll : 0ll;
13756
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13757
vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
13759
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
13760
- (int64x1_t) __b);
13761
+ return (__a & __b) ? -1ll : 0ll;
13765
--- a/src/gcc/config/aarch64/aarch64.md
13766
+++ b/src/gcc/config/aarch64/aarch64.md
13768
(define_c_enum "unspec" [
13781
@@ -230,6 +234,9 @@
13791
@@ -763,19 +770,41 @@
13794
(define_insn "*mov<mode>_aarch64"
13795
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,r,m, r,*w")
13796
- (match_operand:SHORT 1 "general_operand" " r,M,m,rZ,*w,r"))]
13797
+ [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w")
13798
+ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
13799
"(register_operand (operands[0], <MODE>mode)
13800
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
13804
- ldr<size>\\t%w0, %1
13805
- str<size>\\t%w1, %0
13806
- umov\\t%w0, %1.<v>[0]
13807
- dup\\t%0.<Vallxd>, %w1"
13808
- [(set_attr "v8type" "move,alu,load1,store1,*,*")
13809
- (set_attr "simd_type" "*,*,*,*,simd_movgp,simd_dupgp")
13811
+ switch (which_alternative)
13814
+ return "mov\t%w0, %w1";
13816
+ return "mov\t%w0, %1";
13818
+ return aarch64_output_scalar_simd_mov_immediate (operands[1],
13821
+ return "ldr<size>\t%w0, %1";
13823
+ return "ldr\t%<size>0, %1";
13825
+ return "str<size>\t%w1, %0";
13827
+ return "str\t%<size>1, %0";
13829
+ return "umov\t%w0, %1.<v>[0]";
13831
+ return "dup\t%0.<Vallxd>, %w1";
13833
+ return "dup\t%0, %1.<v>[0]";
13835
+ gcc_unreachable ();
13838
+ [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
13839
+ (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
13840
+ (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
13841
(set_attr "mode" "<MODE>")
13842
(set_attr "simd_mode" "<MODE>")]
13844
@@ -797,26 +826,28 @@
13847
(define_insn "*movsi_aarch64"
13848
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m, *w, r,*w")
13849
- (match_operand:SI 1 "aarch64_mov_operand" " r,M,m,rZ,rZ,*w,*w"))]
13850
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,*w,m, m,*w, r,*w")
13851
+ (match_operand:SI 1 "aarch64_mov_operand" " r,M,m, m,rZ,*w,rZ,*w,*w"))]
13852
"(register_operand (operands[0], SImode)
13853
|| aarch64_reg_or_zero (operands[1], SImode))"
13864
- [(set_attr "v8type" "move,alu,load1,store1,fmov,fmov,fmov")
13865
+ [(set_attr "v8type" "move,alu,load1,load1,store1,store1,fmov,fmov,fmov")
13866
(set_attr "mode" "SI")
13867
- (set_attr "fp" "*,*,*,*,yes,yes,yes")]
13868
+ (set_attr "fp" "*,*,*,*,*,*,yes,yes,yes")]
13871
(define_insn "*movdi_aarch64"
13872
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,m, r, r, *w, r,*w,w")
13873
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m,rZ,Usa,Ush,rZ,*w,*w,Dd"))]
13874
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w")
13875
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
13876
"(register_operand (operands[0], DImode)
13877
|| aarch64_reg_or_zero (operands[1], DImode))"
13879
@@ -825,16 +856,18 @@
13892
- [(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov")
13893
+ [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
13894
(set_attr "mode" "DI")
13895
- (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
13896
+ (set_attr "fp" "*,*,*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
13899
(define_insn "insv_imm<mode>"
13900
@@ -842,9 +875,8 @@
13902
(match_operand:GPI 1 "const_int_operand" "n"))
13903
(match_operand:GPI 2 "const_int_operand" "n"))]
13904
- "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
13905
- && INTVAL (operands[1]) % 16 == 0
13906
- && UINTVAL (operands[2]) <= 0xffff"
13907
+ "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
13908
+ && UINTVAL (operands[1]) % 16 == 0"
13909
"movk\\t%<w>0, %X2, lsl %1"
13910
[(set_attr "v8type" "movk")
13911
(set_attr "mode" "<MODE>")]
13912
@@ -1149,13 +1181,14 @@
13915
(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
13916
- [(set (match_operand:GPI 0 "register_operand" "=r,r")
13917
- (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
13918
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
13919
+ (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
13922
uxt<SHORT:size>\t%<GPI:w>0, %w1
13923
- ldr<SHORT:size>\t%w0, %1"
13924
- [(set_attr "v8type" "extend,load1")
13925
+ ldr<SHORT:size>\t%w0, %1
13926
+ ldr\t%<SHORT:size>0, %1"
13927
+ [(set_attr "v8type" "extend,load1,load1")
13928
(set_attr "mode" "<GPI:MODE>")]
13931
@@ -1286,6 +1319,112 @@
13932
(set_attr "mode" "SI")]
13935
+(define_insn "*adds_mul_imm_<mode>"
13936
+ [(set (reg:CC_NZ CC_REGNUM)
13938
+ (plus:GPI (mult:GPI
13939
+ (match_operand:GPI 1 "register_operand" "r")
13940
+ (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
13941
+ (match_operand:GPI 3 "register_operand" "rk"))
13943
+ (set (match_operand:GPI 0 "register_operand" "=r")
13944
+ (plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
13947
+ "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
13948
+ [(set_attr "v8type" "alus_shift")
13949
+ (set_attr "mode" "<MODE>")]
13952
+(define_insn "*subs_mul_imm_<mode>"
13953
+ [(set (reg:CC_NZ CC_REGNUM)
13955
+ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
13957
+ (match_operand:GPI 2 "register_operand" "r")
13958
+ (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
13960
+ (set (match_operand:GPI 0 "register_operand" "=r")
13961
+ (minus:GPI (match_dup 1)
13962
+ (mult:GPI (match_dup 2) (match_dup 3))))]
13964
+ "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
13965
+ [(set_attr "v8type" "alus_shift")
13966
+ (set_attr "mode" "<MODE>")]
13969
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
13970
+ [(set (reg:CC_NZ CC_REGNUM)
13973
+ (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
13974
+ (match_operand:GPI 2 "register_operand" "r"))
13976
+ (set (match_operand:GPI 0 "register_operand" "=r")
13977
+ (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
13979
+ "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
13980
+ [(set_attr "v8type" "alus_ext")
13981
+ (set_attr "mode" "<GPI:MODE>")]
13984
+(define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
13985
+ [(set (reg:CC_NZ CC_REGNUM)
13987
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
13989
+ (match_operand:ALLX 2 "register_operand" "r")))
13991
+ (set (match_operand:GPI 0 "register_operand" "=r")
13992
+ (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
13994
+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
13995
+ [(set_attr "v8type" "alus_ext")
13996
+ (set_attr "mode" "<GPI:MODE>")]
13999
+(define_insn "*adds_<optab><mode>_multp2"
14000
+ [(set (reg:CC_NZ CC_REGNUM)
14002
+ (plus:GPI (ANY_EXTRACT:GPI
14003
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
14004
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
14005
+ (match_operand 3 "const_int_operand" "n")
14007
+ (match_operand:GPI 4 "register_operand" "r"))
14009
+ (set (match_operand:GPI 0 "register_operand" "=r")
14010
+ (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
14014
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
14015
+ "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
14016
+ [(set_attr "v8type" "alus_ext")
14017
+ (set_attr "mode" "<MODE>")]
14020
+(define_insn "*subs_<optab><mode>_multp2"
14021
+ [(set (reg:CC_NZ CC_REGNUM)
14023
+ (minus:GPI (match_operand:GPI 4 "register_operand" "r")
14025
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
14026
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
14027
+ (match_operand 3 "const_int_operand" "n")
14030
+ (set (match_operand:GPI 0 "register_operand" "=r")
14031
+ (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
14032
+ (mult:GPI (match_dup 1) (match_dup 2))
14034
+ (const_int 0))))]
14035
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
14036
+ "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
14037
+ [(set_attr "v8type" "alus_ext")
14038
+ (set_attr "mode" "<MODE>")]
14041
(define_insn "*add<mode>3nr_compare0"
14042
[(set (reg:CC_NZ CC_REGNUM)
14044
@@ -1790,6 +1929,34 @@
14045
(set_attr "mode" "SI")]
14048
+(define_insn "*sub<mode>3_carryin"
14050
+ (match_operand:GPI 0 "register_operand" "=r")
14051
+ (minus:GPI (minus:GPI
14052
+ (match_operand:GPI 1 "register_operand" "r")
14053
+ (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
14054
+ (match_operand:GPI 2 "register_operand" "r")))]
14056
+ "sbc\\t%<w>0, %<w>1, %<w>2"
14057
+ [(set_attr "v8type" "adc")
14058
+ (set_attr "mode" "<MODE>")]
14061
+;; zero_extend version of the above
14062
+(define_insn "*subsi3_carryin_uxtw"
14064
+ (match_operand:DI 0 "register_operand" "=r")
14066
+ (minus:SI (minus:SI
14067
+ (match_operand:SI 1 "register_operand" "r")
14068
+ (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
14069
+ (match_operand:SI 2 "register_operand" "r"))))]
14071
+ "sbc\\t%w0, %w1, %w2"
14072
+ [(set_attr "v8type" "adc")
14073
+ (set_attr "mode" "SI")]
14076
(define_insn "*sub_uxt<mode>_multp2"
14077
[(set (match_operand:GPI 0 "register_operand" "=rk")
14078
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
14079
@@ -1843,6 +2010,27 @@
14080
(set_attr "mode" "SI")]
14083
+(define_insn "*ngc<mode>"
14084
+ [(set (match_operand:GPI 0 "register_operand" "=r")
14085
+ (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
14086
+ (match_operand:GPI 1 "register_operand" "r")))]
14088
+ "ngc\\t%<w>0, %<w>1"
14089
+ [(set_attr "v8type" "adc")
14090
+ (set_attr "mode" "<MODE>")]
14093
+(define_insn "*ngcsi_uxtw"
14094
+ [(set (match_operand:DI 0 "register_operand" "=r")
14096
+ (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
14097
+ (match_operand:SI 1 "register_operand" "r"))))]
14100
+ [(set_attr "v8type" "adc")
14101
+ (set_attr "mode" "SI")]
14104
(define_insn "*neg<mode>2_compare0"
14105
[(set (reg:CC_NZ CC_REGNUM)
14106
(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
14107
@@ -1868,6 +2056,21 @@
14108
(set_attr "mode" "SI")]
14111
+(define_insn "*neg_<shift><mode>3_compare0"
14112
+ [(set (reg:CC_NZ CC_REGNUM)
14114
+ (neg:GPI (ASHIFT:GPI
14115
+ (match_operand:GPI 1 "register_operand" "r")
14116
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
14118
+ (set (match_operand:GPI 0 "register_operand" "=r")
14119
+ (neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))]
14121
+ "negs\\t%<w>0, %<w>1, <shift> %2"
14122
+ [(set_attr "v8type" "alus_shift")
14123
+ (set_attr "mode" "<MODE>")]
14126
(define_insn "*neg_<shift>_<mode>2"
14127
[(set (match_operand:GPI 0 "register_operand" "=r")
14128
(neg:GPI (ASHIFT:GPI
14129
@@ -2157,6 +2360,18 @@
14130
(set_attr "mode" "<GPI:MODE>")]
14133
+(define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
14134
+ [(set (reg:CC_SWP CC_REGNUM)
14135
+ (compare:CC_SWP (ashift:GPI
14137
+ (match_operand:ALLX 0 "register_operand" "r"))
14138
+ (match_operand 1 "aarch64_imm3" "Ui3"))
14139
+ (match_operand:GPI 2 "register_operand" "r")))]
14141
+ "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
14142
+ [(set_attr "v8type" "alus_ext")
14143
+ (set_attr "mode" "<GPI:MODE>")]
14146
;; -------------------------------------------------------------------
14147
;; Store-flag and conditional select insns
14148
@@ -2210,7 +2425,7 @@
14149
(set_attr "mode" "SI")]
14152
-(define_insn "*cstore<mode>_neg"
14153
+(define_insn "cstore<mode>_neg"
14154
[(set (match_operand:ALLI 0 "register_operand" "=r")
14155
(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
14156
[(match_operand 2 "cc_register" "") (const_int 0)])))]
14157
@@ -2433,6 +2648,69 @@
14158
[(set_attr "v8type" "logic,logic_imm")
14159
(set_attr "mode" "SI")])
14161
+(define_insn "*and<mode>3_compare0"
14162
+ [(set (reg:CC_NZ CC_REGNUM)
14164
+ (and:GPI (match_operand:GPI 1 "register_operand" "%r,r")
14165
+ (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>"))
14167
+ (set (match_operand:GPI 0 "register_operand" "=r,r")
14168
+ (and:GPI (match_dup 1) (match_dup 2)))]
14170
+ "ands\\t%<w>0, %<w>1, %<w>2"
14171
+ [(set_attr "v8type" "logics,logics_imm")
14172
+ (set_attr "mode" "<MODE>")]
14175
+;; zero_extend version of above
14176
+(define_insn "*andsi3_compare0_uxtw"
14177
+ [(set (reg:CC_NZ CC_REGNUM)
14179
+ (and:SI (match_operand:SI 1 "register_operand" "%r,r")
14180
+ (match_operand:SI 2 "aarch64_logical_operand" "r,K"))
14182
+ (set (match_operand:DI 0 "register_operand" "=r,r")
14183
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
14185
+ "ands\\t%w0, %w1, %w2"
14186
+ [(set_attr "v8type" "logics,logics_imm")
14187
+ (set_attr "mode" "SI")]
14190
+(define_insn "*and_<SHIFT:optab><mode>3_compare0"
14191
+ [(set (reg:CC_NZ CC_REGNUM)
14193
+ (and:GPI (SHIFT:GPI
14194
+ (match_operand:GPI 1 "register_operand" "r")
14195
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
14196
+ (match_operand:GPI 3 "register_operand" "r"))
14198
+ (set (match_operand:GPI 0 "register_operand" "=r")
14199
+ (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
14201
+ "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
14202
+ [(set_attr "v8type" "logics_shift")
14203
+ (set_attr "mode" "<MODE>")]
14206
+;; zero_extend version of above
14207
+(define_insn "*and_<SHIFT:optab>si3_compare0_uxtw"
14208
+ [(set (reg:CC_NZ CC_REGNUM)
14210
+ (and:SI (SHIFT:SI
14211
+ (match_operand:SI 1 "register_operand" "r")
14212
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
14213
+ (match_operand:SI 3 "register_operand" "r"))
14215
+ (set (match_operand:DI 0 "register_operand" "=r")
14216
+ (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2))
14217
+ (match_dup 3))))]
14219
+ "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
14220
+ [(set_attr "v8type" "logics_shift")
14221
+ (set_attr "mode" "SI")]
14224
(define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
14225
[(set (match_operand:GPI 0 "register_operand" "=r")
14226
(LOGICAL:GPI (SHIFT:GPI
14227
@@ -2703,6 +2981,62 @@
14228
(set_attr "mode" "<MODE>")]
14231
+(define_insn "*extr<mode>5_insn"
14232
+ [(set (match_operand:GPI 0 "register_operand" "=r")
14233
+ (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
14234
+ (match_operand 3 "const_int_operand" "n"))
14235
+ (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
14236
+ (match_operand 4 "const_int_operand" "n"))))]
14237
+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
14238
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
14239
+ "extr\\t%<w>0, %<w>1, %<w>2, %4"
14240
+ [(set_attr "v8type" "shift")
14241
+ (set_attr "mode" "<MODE>")]
14244
+;; zero_extend version of the above
14245
+(define_insn "*extrsi5_insn_uxtw"
14246
+ [(set (match_operand:DI 0 "register_operand" "=r")
14248
+ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
14249
+ (match_operand 3 "const_int_operand" "n"))
14250
+ (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
14251
+ (match_operand 4 "const_int_operand" "n")))))]
14252
+ "UINTVAL (operands[3]) < 32 &&
14253
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
14254
+ "extr\\t%w0, %w1, %w2, %4"
14255
+ [(set_attr "v8type" "shift")
14256
+ (set_attr "mode" "SI")]
14259
+(define_insn "*ror<mode>3_insn"
14260
+ [(set (match_operand:GPI 0 "register_operand" "=r")
14261
+ (rotate:GPI (match_operand:GPI 1 "register_operand" "r")
14262
+ (match_operand 2 "const_int_operand" "n")))]
14263
+ "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
14265
+ operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
14266
+ return "ror\\t%<w>0, %<w>1, %3";
14268
+ [(set_attr "v8type" "shift")
14269
+ (set_attr "mode" "<MODE>")]
14272
+;; zero_extend version of the above
14273
+(define_insn "*rorsi3_insn_uxtw"
14274
+ [(set (match_operand:DI 0 "register_operand" "=r")
14276
+ (rotate:SI (match_operand:SI 1 "register_operand" "r")
14277
+ (match_operand 2 "const_int_operand" "n"))))]
14278
+ "UINTVAL (operands[2]) < 32"
14280
+ operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
14281
+ return "ror\\t%w0, %w1, %3";
14283
+ [(set_attr "v8type" "shift")
14284
+ (set_attr "mode" "SI")]
14287
(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
14288
[(set (match_operand:GPI 0 "register_operand" "=r")
14290
@@ -2769,6 +3103,50 @@
14291
(set_attr "mode" "<MODE>")]
14294
+;; Bitfield Insert (insv)
14295
+(define_expand "insv<mode>"
14296
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
14297
+ (match_operand 1 "const_int_operand")
14298
+ (match_operand 2 "const_int_operand"))
14299
+ (match_operand:GPI 3 "general_operand"))]
14302
+ unsigned HOST_WIDE_INT width = UINTVAL (operands[1]);
14303
+ unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
14304
+ rtx value = operands[3];
14306
+ if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode))
14309
+ if (CONST_INT_P (value))
14311
+ unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
14313
+ /* Prefer AND/OR for inserting all zeros or all ones. */
14314
+ if ((UINTVAL (value) & mask) == 0
14315
+ || (UINTVAL (value) & mask) == mask)
14318
+ /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */
14319
+ if (width == 16 && (pos % 16) == 0)
14322
+ operands[3] = force_reg (<MODE>mode, value);
14325
+(define_insn "*insv_reg<mode>"
14326
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
14327
+ (match_operand 1 "const_int_operand" "n")
14328
+ (match_operand 2 "const_int_operand" "n"))
14329
+ (match_operand:GPI 3 "register_operand" "r"))]
14330
+ "!(UINTVAL (operands[1]) == 0
14331
+ || (UINTVAL (operands[2]) + UINTVAL (operands[1])
14332
+ > GET_MODE_BITSIZE (<MODE>mode)))"
14333
+ "bfi\\t%<w>0, %<w>3, %2, %1"
14334
+ [(set_attr "v8type" "bfm")
14335
+ (set_attr "mode" "<MODE>")]
14338
(define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
14339
[(set (match_operand:GPI 0 "register_operand" "=r")
14340
(ashift:GPI (ANY_EXTEND:GPI
14341
@@ -3089,6 +3467,27 @@
14342
(set_attr "mode" "<MODE>")]
14345
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
14346
+ [(set (match_operand:GPF 0 "register_operand" "=w")
14347
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
14350
+ "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
14351
+ [(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
14352
+ (set_attr "mode" "<MODE>")]
14355
+(define_insn "aarch64_frecps<mode>"
14356
+ [(set (match_operand:GPF 0 "register_operand" "=w")
14357
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
14358
+ (match_operand:GPF 2 "register_operand" "w")]
14361
+ "frecps\\t%<s>0, %<s>1, %<s>2"
14362
+ [(set_attr "v8type" "frecps")
14363
+ (set_attr "mode" "<MODE>")]
14366
;; -------------------------------------------------------------------
14368
;; -------------------------------------------------------------------
14369
--- a/src/gcc/config/aarch64/aarch64-builtins.c
14370
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
14372
#include "langhooks.h"
14373
#include "diagnostic-core.h"
14374
#include "optabs.h"
14375
+#include "gimple.h"
14377
enum aarch64_simd_builtin_type_mode
14391
+#define sf_UP T_SF
14395
@@ -128,123 +131,136 @@
14396
unsigned int fcode;
14397
} aarch64_simd_builtin_datum;
14399
-#define CF(N, X) CODE_FOR_aarch64_##N##X
14400
+#define CF0(N, X) CODE_FOR_aarch64_##N##X
14401
+#define CF1(N, X) CODE_FOR_##N##X##1
14402
+#define CF2(N, X) CODE_FOR_##N##X##2
14403
+#define CF3(N, X) CODE_FOR_##N##X##3
14404
+#define CF4(N, X) CODE_FOR_##N##X##4
14405
+#define CF10(N, X) CODE_FOR_##N##X
14407
-#define VAR1(T, N, A) \
14408
- {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0},
14409
-#define VAR2(T, N, A, B) \
14412
-#define VAR3(T, N, A, B, C) \
14413
- VAR2 (T, N, A, B) \
14415
-#define VAR4(T, N, A, B, C, D) \
14416
- VAR3 (T, N, A, B, C) \
14418
-#define VAR5(T, N, A, B, C, D, E) \
14419
- VAR4 (T, N, A, B, C, D) \
14421
-#define VAR6(T, N, A, B, C, D, E, F) \
14422
- VAR5 (T, N, A, B, C, D, E) \
14424
-#define VAR7(T, N, A, B, C, D, E, F, G) \
14425
- VAR6 (T, N, A, B, C, D, E, F) \
14427
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
14428
- VAR7 (T, N, A, B, C, D, E, F, G) \
14430
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14431
- VAR8 (T, N, A, B, C, D, E, F, G, H) \
14433
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14434
- VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
14436
-#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
14437
- VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
14439
-#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
14440
- VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
14442
+#define VAR1(T, N, MAP, A) \
14443
+ {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0},
14444
+#define VAR2(T, N, MAP, A, B) \
14445
+ VAR1 (T, N, MAP, A) \
14446
+ VAR1 (T, N, MAP, B)
14447
+#define VAR3(T, N, MAP, A, B, C) \
14448
+ VAR2 (T, N, MAP, A, B) \
14449
+ VAR1 (T, N, MAP, C)
14450
+#define VAR4(T, N, MAP, A, B, C, D) \
14451
+ VAR3 (T, N, MAP, A, B, C) \
14452
+ VAR1 (T, N, MAP, D)
14453
+#define VAR5(T, N, MAP, A, B, C, D, E) \
14454
+ VAR4 (T, N, MAP, A, B, C, D) \
14455
+ VAR1 (T, N, MAP, E)
14456
+#define VAR6(T, N, MAP, A, B, C, D, E, F) \
14457
+ VAR5 (T, N, MAP, A, B, C, D, E) \
14458
+ VAR1 (T, N, MAP, F)
14459
+#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
14460
+ VAR6 (T, N, MAP, A, B, C, D, E, F) \
14461
+ VAR1 (T, N, MAP, G)
14462
+#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
14463
+ VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
14464
+ VAR1 (T, N, MAP, H)
14465
+#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
14466
+ VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
14467
+ VAR1 (T, N, MAP, I)
14468
+#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
14469
+ VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
14470
+ VAR1 (T, N, MAP, J)
14471
+#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
14472
+ VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
14473
+ VAR1 (T, N, MAP, K)
14474
+#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
14475
+ VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
14476
+ VAR1 (T, N, MAP, L)
14478
/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
14479
modes as is given for each define_mode_iterator in
14480
config/aarch64/iterators.md. */
14482
-#define BUILTIN_DX(T, N) \
14483
- VAR2 (T, N, di, df)
14484
-#define BUILTIN_SDQ_I(T, N) \
14485
- VAR4 (T, N, qi, hi, si, di)
14486
-#define BUILTIN_SD_HSI(T, N) \
14487
- VAR2 (T, N, hi, si)
14488
-#define BUILTIN_V2F(T, N) \
14489
- VAR2 (T, N, v2sf, v2df)
14490
-#define BUILTIN_VALL(T, N) \
14491
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df)
14492
-#define BUILTIN_VB(T, N) \
14493
- VAR2 (T, N, v8qi, v16qi)
14494
-#define BUILTIN_VD(T, N) \
14495
- VAR4 (T, N, v8qi, v4hi, v2si, v2sf)
14496
-#define BUILTIN_VDC(T, N) \
14497
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
14498
-#define BUILTIN_VDIC(T, N) \
14499
- VAR3 (T, N, v8qi, v4hi, v2si)
14500
-#define BUILTIN_VDN(T, N) \
14501
- VAR3 (T, N, v4hi, v2si, di)
14502
-#define BUILTIN_VDQ(T, N) \
14503
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
14504
-#define BUILTIN_VDQF(T, N) \
14505
- VAR3 (T, N, v2sf, v4sf, v2df)
14506
-#define BUILTIN_VDQHS(T, N) \
14507
- VAR4 (T, N, v4hi, v8hi, v2si, v4si)
14508
-#define BUILTIN_VDQIF(T, N) \
14509
- VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
14510
-#define BUILTIN_VDQM(T, N) \
14511
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14512
-#define BUILTIN_VDQV(T, N) \
14513
- VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si)
14514
-#define BUILTIN_VDQ_BHSI(T, N) \
14515
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14516
-#define BUILTIN_VDQ_I(T, N) \
14517
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
14518
-#define BUILTIN_VDW(T, N) \
14519
- VAR3 (T, N, v8qi, v4hi, v2si)
14520
-#define BUILTIN_VD_BHSI(T, N) \
14521
- VAR3 (T, N, v8qi, v4hi, v2si)
14522
-#define BUILTIN_VD_HSI(T, N) \
14523
- VAR2 (T, N, v4hi, v2si)
14524
-#define BUILTIN_VD_RE(T, N) \
14525
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
14526
-#define BUILTIN_VQ(T, N) \
14527
- VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df)
14528
-#define BUILTIN_VQN(T, N) \
14529
- VAR3 (T, N, v8hi, v4si, v2di)
14530
-#define BUILTIN_VQW(T, N) \
14531
- VAR3 (T, N, v16qi, v8hi, v4si)
14532
-#define BUILTIN_VQ_HSI(T, N) \
14533
- VAR2 (T, N, v8hi, v4si)
14534
-#define BUILTIN_VQ_S(T, N) \
14535
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14536
-#define BUILTIN_VSDQ_HSI(T, N) \
14537
- VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si)
14538
-#define BUILTIN_VSDQ_I(T, N) \
14539
- VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
14540
-#define BUILTIN_VSDQ_I_BHSI(T, N) \
14541
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
14542
-#define BUILTIN_VSDQ_I_DI(T, N) \
14543
- VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
14544
-#define BUILTIN_VSD_HSI(T, N) \
14545
- VAR4 (T, N, v4hi, v2si, hi, si)
14546
-#define BUILTIN_VSQN_HSDI(T, N) \
14547
- VAR6 (T, N, v8hi, v4si, v2di, hi, si, di)
14548
-#define BUILTIN_VSTRUCT(T, N) \
14549
- VAR3 (T, N, oi, ci, xi)
14550
+#define BUILTIN_DX(T, N, MAP) \
14551
+ VAR2 (T, N, MAP, di, df)
14552
+#define BUILTIN_GPF(T, N, MAP) \
14553
+ VAR2 (T, N, MAP, sf, df)
14554
+#define BUILTIN_SDQ_I(T, N, MAP) \
14555
+ VAR4 (T, N, MAP, qi, hi, si, di)
14556
+#define BUILTIN_SD_HSI(T, N, MAP) \
14557
+ VAR2 (T, N, MAP, hi, si)
14558
+#define BUILTIN_V2F(T, N, MAP) \
14559
+ VAR2 (T, N, MAP, v2sf, v2df)
14560
+#define BUILTIN_VALL(T, N, MAP) \
14561
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
14562
+ v4si, v2di, v2sf, v4sf, v2df)
14563
+#define BUILTIN_VALLDI(T, N, MAP) \
14564
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
14565
+ v4si, v2di, v2sf, v4sf, v2df, di)
14566
+#define BUILTIN_VB(T, N, MAP) \
14567
+ VAR2 (T, N, MAP, v8qi, v16qi)
14568
+#define BUILTIN_VD(T, N, MAP) \
14569
+ VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
14570
+#define BUILTIN_VDC(T, N, MAP) \
14571
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
14572
+#define BUILTIN_VDIC(T, N, MAP) \
14573
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
14574
+#define BUILTIN_VDN(T, N, MAP) \
14575
+ VAR3 (T, N, MAP, v4hi, v2si, di)
14576
+#define BUILTIN_VDQ(T, N, MAP) \
14577
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
14578
+#define BUILTIN_VDQF(T, N, MAP) \
14579
+ VAR3 (T, N, MAP, v2sf, v4sf, v2df)
14580
+#define BUILTIN_VDQH(T, N, MAP) \
14581
+ VAR2 (T, N, MAP, v4hi, v8hi)
14582
+#define BUILTIN_VDQHS(T, N, MAP) \
14583
+ VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
14584
+#define BUILTIN_VDQIF(T, N, MAP) \
14585
+ VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
14586
+#define BUILTIN_VDQM(T, N, MAP) \
14587
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14588
+#define BUILTIN_VDQV(T, N, MAP) \
14589
+ VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
14590
+#define BUILTIN_VDQ_BHSI(T, N, MAP) \
14591
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14592
+#define BUILTIN_VDQ_I(T, N, MAP) \
14593
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
14594
+#define BUILTIN_VDW(T, N, MAP) \
14595
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
14596
+#define BUILTIN_VD_BHSI(T, N, MAP) \
14597
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
14598
+#define BUILTIN_VD_HSI(T, N, MAP) \
14599
+ VAR2 (T, N, MAP, v4hi, v2si)
14600
+#define BUILTIN_VD_RE(T, N, MAP) \
14601
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
14602
+#define BUILTIN_VQ(T, N, MAP) \
14603
+ VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
14604
+#define BUILTIN_VQN(T, N, MAP) \
14605
+ VAR3 (T, N, MAP, v8hi, v4si, v2di)
14606
+#define BUILTIN_VQW(T, N, MAP) \
14607
+ VAR3 (T, N, MAP, v16qi, v8hi, v4si)
14608
+#define BUILTIN_VQ_HSI(T, N, MAP) \
14609
+ VAR2 (T, N, MAP, v8hi, v4si)
14610
+#define BUILTIN_VQ_S(T, N, MAP) \
14611
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
14612
+#define BUILTIN_VSDQ_HSI(T, N, MAP) \
14613
+ VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
14614
+#define BUILTIN_VSDQ_I(T, N, MAP) \
14615
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
14616
+#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
14617
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
14618
+#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
14619
+ VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
14620
+#define BUILTIN_VSD_HSI(T, N, MAP) \
14621
+ VAR4 (T, N, MAP, v4hi, v2si, hi, si)
14622
+#define BUILTIN_VSQN_HSDI(T, N, MAP) \
14623
+ VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
14624
+#define BUILTIN_VSTRUCT(T, N, MAP) \
14625
+ VAR3 (T, N, MAP, oi, ci, xi)
14627
static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
14628
#include "aarch64-simd-builtins.def"
14632
-#define VAR1(T, N, A) \
14633
+#define VAR1(T, N, MAP, A) \
14634
AARCH64_SIMD_BUILTIN_##N##A,
14636
enum aarch64_builtins
14637
@@ -257,53 +273,6 @@
14638
AARCH64_BUILTIN_MAX
14642
-#undef BUILTIN_SDQ_I
14643
-#undef BUILTIN_SD_HSI
14644
-#undef BUILTIN_V2F
14645
-#undef BUILTIN_VALL
14648
-#undef BUILTIN_VDC
14649
-#undef BUILTIN_VDIC
14650
-#undef BUILTIN_VDN
14651
-#undef BUILTIN_VDQ
14652
-#undef BUILTIN_VDQF
14653
-#undef BUILTIN_VDQHS
14654
-#undef BUILTIN_VDQIF
14655
-#undef BUILTIN_VDQM
14656
-#undef BUILTIN_VDQV
14657
-#undef BUILTIN_VDQ_BHSI
14658
-#undef BUILTIN_VDQ_I
14659
-#undef BUILTIN_VDW
14660
-#undef BUILTIN_VD_BHSI
14661
-#undef BUILTIN_VD_HSI
14662
-#undef BUILTIN_VD_RE
14664
-#undef BUILTIN_VQN
14665
-#undef BUILTIN_VQW
14666
-#undef BUILTIN_VQ_HSI
14667
-#undef BUILTIN_VQ_S
14668
-#undef BUILTIN_VSDQ_HSI
14669
-#undef BUILTIN_VSDQ_I
14670
-#undef BUILTIN_VSDQ_I_BHSI
14671
-#undef BUILTIN_VSDQ_I_DI
14672
-#undef BUILTIN_VSD_HSI
14673
-#undef BUILTIN_VSQN_HSDI
14674
-#undef BUILTIN_VSTRUCT
14688
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
14690
#define NUM_DREG_TYPES 6
14691
@@ -609,7 +578,7 @@
14693
"v8qi", "v4hi", "v2si", "v2sf", "di", "df",
14694
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
14695
- "ti", "ei", "oi", "xi", "si", "hi", "qi"
14696
+ "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
14700
@@ -1258,30 +1227,82 @@
14701
&& in_mode == N##Fmode && in_n == C)
14702
case BUILT_IN_FLOOR:
14703
case BUILT_IN_FLOORF:
14704
- return AARCH64_FIND_FRINT_VARIANT (frintm);
14705
+ return AARCH64_FIND_FRINT_VARIANT (floor);
14706
case BUILT_IN_CEIL:
14707
case BUILT_IN_CEILF:
14708
- return AARCH64_FIND_FRINT_VARIANT (frintp);
14709
+ return AARCH64_FIND_FRINT_VARIANT (ceil);
14710
case BUILT_IN_TRUNC:
14711
case BUILT_IN_TRUNCF:
14712
- return AARCH64_FIND_FRINT_VARIANT (frintz);
14713
+ return AARCH64_FIND_FRINT_VARIANT (btrunc);
14714
case BUILT_IN_ROUND:
14715
case BUILT_IN_ROUNDF:
14716
- return AARCH64_FIND_FRINT_VARIANT (frinta);
14717
+ return AARCH64_FIND_FRINT_VARIANT (round);
14718
case BUILT_IN_NEARBYINT:
14719
case BUILT_IN_NEARBYINTF:
14720
- return AARCH64_FIND_FRINT_VARIANT (frinti);
14721
+ return AARCH64_FIND_FRINT_VARIANT (nearbyint);
14722
case BUILT_IN_SQRT:
14723
case BUILT_IN_SQRTF:
14724
return AARCH64_FIND_FRINT_VARIANT (sqrt);
14725
#undef AARCH64_CHECK_BUILTIN_MODE
14726
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
14727
+ (out_mode == SImode && out_n == C \
14728
+ && in_mode == N##Imode && in_n == C)
14729
+ case BUILT_IN_CLZ:
14731
+ if (AARCH64_CHECK_BUILTIN_MODE (4, S))
14732
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si];
14733
+ return NULL_TREE;
14735
+#undef AARCH64_CHECK_BUILTIN_MODE
14736
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
14737
(out_mode == N##Imode && out_n == C \
14738
&& in_mode == N##Fmode && in_n == C)
14739
case BUILT_IN_LFLOOR:
14740
- return AARCH64_FIND_FRINT_VARIANT (fcvtms);
14741
+ case BUILT_IN_IFLOORF:
14743
+ tree new_tree = NULL_TREE;
14744
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
14746
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2dfv2di];
14747
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
14749
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv4sfv4si];
14750
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
14752
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2sfv2si];
14755
case BUILT_IN_LCEIL:
14756
- return AARCH64_FIND_FRINT_VARIANT (fcvtps);
14757
+ case BUILT_IN_ICEILF:
14759
+ tree new_tree = NULL_TREE;
14760
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
14762
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2dfv2di];
14763
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
14765
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv4sfv4si];
14766
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
14768
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2sfv2si];
14771
+ case BUILT_IN_LROUND:
14772
+ case BUILT_IN_IROUNDF:
14774
+ tree new_tree = NULL_TREE;
14775
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
14777
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2dfv2di];
14778
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
14780
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv4sfv4si];
14781
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
14783
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2sfv2si];
14790
@@ -1289,5 +1310,160 @@
14796
+#define VAR1(T, N, MAP, A) \
14797
+ case AARCH64_SIMD_BUILTIN_##N##A:
14800
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
14801
+ bool ignore ATTRIBUTE_UNUSED)
14803
+ int fcode = DECL_FUNCTION_CODE (fndecl);
14804
+ tree type = TREE_TYPE (TREE_TYPE (fndecl));
14808
+ BUILTIN_VDQF (UNOP, abs, 2)
14809
+ return fold_build1 (ABS_EXPR, type, args[0]);
14811
+ BUILTIN_VALLDI (BINOP, cmge, 0)
14812
+ return fold_build2 (GE_EXPR, type, args[0], args[1]);
14814
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
14815
+ return fold_build2 (GT_EXPR, type, args[0], args[1]);
14817
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
14818
+ return fold_build2 (EQ_EXPR, type, args[0], args[1]);
14820
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
14822
+ tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
14823
+ tree vec_zero_node = build_zero_cst (type);
14824
+ return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
14827
+ VAR1 (UNOP, floatv2si, 2, v2sf)
14828
+ VAR1 (UNOP, floatv4si, 2, v4sf)
14829
+ VAR1 (UNOP, floatv2di, 2, v2df)
14830
+ return fold_build1 (FLOAT_EXPR, type, args[0]);
14835
+ return NULL_TREE;
14839
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
14841
+ bool changed = false;
14842
+ gimple stmt = gsi_stmt (*gsi);
14843
+ tree call = gimple_call_fn (stmt);
14845
+ gimple new_stmt = NULL;
14848
+ fndecl = gimple_call_fndecl (stmt);
14851
+ int fcode = DECL_FUNCTION_CODE (fndecl);
14852
+ int nargs = gimple_call_num_args (stmt);
14853
+ tree *args = (nargs > 0
14854
+ ? gimple_call_arg_ptr (stmt, 0)
14855
+ : &error_mark_node);
14859
+ BUILTIN_VALL (UNOP, reduc_splus_, 10)
14860
+ new_stmt = gimple_build_assign_with_ops (
14862
+ gimple_call_lhs (stmt),
14866
+ BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
14867
+ new_stmt = gimple_build_assign_with_ops (
14869
+ gimple_call_lhs (stmt),
14873
+ BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
14874
+ new_stmt = gimple_build_assign_with_ops (
14876
+ gimple_call_lhs (stmt),
14889
+ gsi_replace (gsi, new_stmt, true);
14896
#undef AARCH64_CHECK_BUILTIN_MODE
14897
#undef AARCH64_FIND_FRINT_VARIANT
14899
+#undef BUILTIN_SDQ_I
14900
+#undef BUILTIN_SD_HSI
14901
+#undef BUILTIN_V2F
14902
+#undef BUILTIN_VALL
14905
+#undef BUILTIN_VDC
14906
+#undef BUILTIN_VDIC
14907
+#undef BUILTIN_VDN
14908
+#undef BUILTIN_VDQ
14909
+#undef BUILTIN_VDQF
14910
+#undef BUILTIN_VDQH
14911
+#undef BUILTIN_VDQHS
14912
+#undef BUILTIN_VDQIF
14913
+#undef BUILTIN_VDQM
14914
+#undef BUILTIN_VDQV
14915
+#undef BUILTIN_VDQ_BHSI
14916
+#undef BUILTIN_VDQ_I
14917
+#undef BUILTIN_VDW
14918
+#undef BUILTIN_VD_BHSI
14919
+#undef BUILTIN_VD_HSI
14920
+#undef BUILTIN_VD_RE
14922
+#undef BUILTIN_VQN
14923
+#undef BUILTIN_VQW
14924
+#undef BUILTIN_VQ_HSI
14925
+#undef BUILTIN_VQ_S
14926
+#undef BUILTIN_VSDQ_HSI
14927
+#undef BUILTIN_VSDQ_I
14928
+#undef BUILTIN_VSDQ_I_BHSI
14929
+#undef BUILTIN_VSDQ_I_DI
14930
+#undef BUILTIN_VSD_HSI
14931
+#undef BUILTIN_VSQN_HSDI
14932
+#undef BUILTIN_VSTRUCT
14951
--- a/src/gcc/config/aarch64/aarch64-protos.h
14952
+++ b/src/gcc/config/aarch64/aarch64-protos.h
14954
SYMBOL_SMALL_TLSDESC,
14955
SYMBOL_SMALL_GOTTPREL,
14956
SYMBOL_SMALL_TPREL,
14957
+ SYMBOL_TINY_ABSOLUTE,
14958
SYMBOL_FORCE_TO_MEM
14961
@@ -140,18 +141,26 @@
14962
bool aarch64_float_const_zero_rtx_p (rtx);
14963
bool aarch64_function_arg_regno_p (unsigned);
14964
bool aarch64_gen_movmemqi (rtx *);
14965
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
14966
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
14967
bool aarch64_is_long_call_p (rtx);
14968
bool aarch64_label_mentioned_p (rtx);
14969
bool aarch64_legitimate_pic_operand_p (rtx);
14970
bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
14971
+bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
14972
+ enum machine_mode);
14973
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
14974
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
14975
bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
14976
bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
14977
bool aarch64_regno_ok_for_base_p (int, bool);
14978
bool aarch64_regno_ok_for_index_p (int, bool);
14979
bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
14980
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
14981
+bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
14982
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
14983
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
14984
+ struct simd_immediate_info *);
14985
bool aarch64_symbolic_address_p (rtx);
14986
bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
14987
enum aarch64_symbol_type *);
14988
@@ -177,6 +186,7 @@
14989
bool aarch64_simd_mem_operand_p (rtx);
14990
rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
14991
rtx aarch64_tls_get_addr (void);
14992
+tree aarch64_fold_builtin (tree, int, tree *, bool);
14993
unsigned aarch64_dbx_register_number (unsigned);
14994
unsigned aarch64_trampoline_size (void);
14995
void aarch64_asm_output_labelref (FILE *, const char *);
14996
@@ -216,6 +226,8 @@
14998
bool aarch64_split_128bit_move_p (rtx, rtx);
15000
+void aarch64_split_simd_move (rtx, rtx);
15002
/* Check for a legitimate floating point constant for FMOV. */
15003
bool aarch64_float_const_representable_p (rtx);
15005
@@ -249,6 +261,4 @@
15006
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
15008
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
15010
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
15011
#endif /* GCC_AARCH64_PROTOS_H */
15012
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
15013
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
15014
@@ -18,241 +18,339 @@
15015
along with GCC; see the file COPYING3. If not see
15016
<http://www.gnu.org/licenses/>. */
15018
-/* In the list below, the BUILTIN_<ITERATOR> macros should
15019
- correspond to the iterator used to construct the instruction's
15020
- patterns in aarch64-simd.md. A helpful idiom to follow when
15021
- adding new builtins is to add a line for each pattern in the md
15022
- file. Thus, ADDP, which has one pattern defined for the VD_BHSI
15023
- iterator, and one for DImode, has two entries below. */
15024
+/* In the list below, the BUILTIN_<ITERATOR> macros expand to create
15025
+ builtins for each of the modes described by <ITERATOR>. When adding
15026
+ new builtins to this list, a helpful idiom to follow is to add
15027
+ a line for each pattern in the md file. Thus, ADDP, which has one
15028
+ pattern defined for the VD_BHSI iterator, and one for DImode, has two
15031
- BUILTIN_VD_RE (CREATE, create)
15032
- BUILTIN_VQ_S (GETLANE, get_lane_signed)
15033
- BUILTIN_VDQ (GETLANE, get_lane_unsigned)
15034
- BUILTIN_VDQF (GETLANE, get_lane)
15035
- VAR1 (GETLANE, get_lane, di)
15036
- BUILTIN_VDC (COMBINE, combine)
15037
- BUILTIN_VB (BINOP, pmul)
15038
- BUILTIN_VDQF (UNOP, sqrt)
15039
- BUILTIN_VD_BHSI (BINOP, addp)
15040
- VAR1 (UNOP, addp, di)
15041
+ Parameter 1 is the 'type' of the intrinsic. This is used to
15042
+ describe the type modifiers (for example; unsigned) applied to
15043
+ each of the parameters to the intrinsic function.
15045
- BUILTIN_VD_RE (REINTERP, reinterpretdi)
15046
- BUILTIN_VDC (REINTERP, reinterpretv8qi)
15047
- BUILTIN_VDC (REINTERP, reinterpretv4hi)
15048
- BUILTIN_VDC (REINTERP, reinterpretv2si)
15049
- BUILTIN_VDC (REINTERP, reinterpretv2sf)
15050
- BUILTIN_VQ (REINTERP, reinterpretv16qi)
15051
- BUILTIN_VQ (REINTERP, reinterpretv8hi)
15052
- BUILTIN_VQ (REINTERP, reinterpretv4si)
15053
- BUILTIN_VQ (REINTERP, reinterpretv4sf)
15054
- BUILTIN_VQ (REINTERP, reinterpretv2di)
15055
- BUILTIN_VQ (REINTERP, reinterpretv2df)
15056
+ Parameter 2 is the name of the intrinsic. This is appended
15057
+ to `__builtin_aarch64_<name><mode>` to give the intrinsic name
15058
+ as exported to the front-ends.
15060
- BUILTIN_VDQ_I (BINOP, dup_lane)
15061
- BUILTIN_SDQ_I (BINOP, dup_lane)
15062
+ Parameter 3 describes how to map from the name to the CODE_FOR_
15063
+ macro holding the RTL pattern for the intrinsic. This mapping is:
15064
+ 0 - CODE_FOR_aarch64_<name><mode>
15065
+ 1-9 - CODE_FOR_<name><mode><1-9>
15066
+ 10 - CODE_FOR_<name><mode>. */
15068
+ BUILTIN_VD_RE (CREATE, create, 0)
15069
+ BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
15070
+ BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
15071
+ BUILTIN_VDQF (GETLANE, get_lane, 0)
15072
+ VAR1 (GETLANE, get_lane, 0, di)
15073
+ BUILTIN_VDC (COMBINE, combine, 0)
15074
+ BUILTIN_VB (BINOP, pmul, 0)
15075
+ BUILTIN_VDQF (UNOP, sqrt, 2)
15076
+ BUILTIN_VD_BHSI (BINOP, addp, 0)
15077
+ VAR1 (UNOP, addp, 0, di)
15078
+ VAR1 (UNOP, clz, 2, v4si)
15080
+ BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
15081
+ BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
15082
+ BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
15083
+ BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
15084
+ BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
15085
+ BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
15086
+ BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
15087
+ BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
15088
+ BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
15089
+ BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
15090
+ BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
15092
+ BUILTIN_VDQ_I (BINOP, dup_lane, 0)
15093
+ BUILTIN_SDQ_I (BINOP, dup_lane, 0)
15094
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
15095
- BUILTIN_VSDQ_I (BINOP, sqshl)
15096
- BUILTIN_VSDQ_I (BINOP, uqshl)
15097
- BUILTIN_VSDQ_I (BINOP, sqrshl)
15098
- BUILTIN_VSDQ_I (BINOP, uqrshl)
15099
+ BUILTIN_VSDQ_I (BINOP, sqshl, 0)
15100
+ BUILTIN_VSDQ_I (BINOP, uqshl, 0)
15101
+ BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
15102
+ BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
15103
/* Implemented by aarch64_<su_optab><optab><mode>. */
15104
- BUILTIN_VSDQ_I (BINOP, sqadd)
15105
- BUILTIN_VSDQ_I (BINOP, uqadd)
15106
- BUILTIN_VSDQ_I (BINOP, sqsub)
15107
- BUILTIN_VSDQ_I (BINOP, uqsub)
15108
+ BUILTIN_VSDQ_I (BINOP, sqadd, 0)
15109
+ BUILTIN_VSDQ_I (BINOP, uqadd, 0)
15110
+ BUILTIN_VSDQ_I (BINOP, sqsub, 0)
15111
+ BUILTIN_VSDQ_I (BINOP, uqsub, 0)
15112
/* Implemented by aarch64_<sur>qadd<mode>. */
15113
- BUILTIN_VSDQ_I (BINOP, suqadd)
15114
- BUILTIN_VSDQ_I (BINOP, usqadd)
15115
+ BUILTIN_VSDQ_I (BINOP, suqadd, 0)
15116
+ BUILTIN_VSDQ_I (BINOP, usqadd, 0)
15118
/* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
15119
- BUILTIN_VDC (GETLANE, get_dregoi)
15120
- BUILTIN_VDC (GETLANE, get_dregci)
15121
- BUILTIN_VDC (GETLANE, get_dregxi)
15122
+ BUILTIN_VDC (GETLANE, get_dregoi, 0)
15123
+ BUILTIN_VDC (GETLANE, get_dregci, 0)
15124
+ BUILTIN_VDC (GETLANE, get_dregxi, 0)
15125
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
15126
- BUILTIN_VQ (GETLANE, get_qregoi)
15127
- BUILTIN_VQ (GETLANE, get_qregci)
15128
- BUILTIN_VQ (GETLANE, get_qregxi)
15129
+ BUILTIN_VQ (GETLANE, get_qregoi, 0)
15130
+ BUILTIN_VQ (GETLANE, get_qregci, 0)
15131
+ BUILTIN_VQ (GETLANE, get_qregxi, 0)
15132
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
15133
- BUILTIN_VQ (SETLANE, set_qregoi)
15134
- BUILTIN_VQ (SETLANE, set_qregci)
15135
- BUILTIN_VQ (SETLANE, set_qregxi)
15136
+ BUILTIN_VQ (SETLANE, set_qregoi, 0)
15137
+ BUILTIN_VQ (SETLANE, set_qregci, 0)
15138
+ BUILTIN_VQ (SETLANE, set_qregxi, 0)
15139
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
15140
- BUILTIN_VDC (LOADSTRUCT, ld2)
15141
- BUILTIN_VDC (LOADSTRUCT, ld3)
15142
- BUILTIN_VDC (LOADSTRUCT, ld4)
15143
+ BUILTIN_VDC (LOADSTRUCT, ld2, 0)
15144
+ BUILTIN_VDC (LOADSTRUCT, ld3, 0)
15145
+ BUILTIN_VDC (LOADSTRUCT, ld4, 0)
15146
/* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */
15147
- BUILTIN_VQ (LOADSTRUCT, ld2)
15148
- BUILTIN_VQ (LOADSTRUCT, ld3)
15149
- BUILTIN_VQ (LOADSTRUCT, ld4)
15150
+ BUILTIN_VQ (LOADSTRUCT, ld2, 0)
15151
+ BUILTIN_VQ (LOADSTRUCT, ld3, 0)
15152
+ BUILTIN_VQ (LOADSTRUCT, ld4, 0)
15153
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
15154
- BUILTIN_VDC (STORESTRUCT, st2)
15155
- BUILTIN_VDC (STORESTRUCT, st3)
15156
- BUILTIN_VDC (STORESTRUCT, st4)
15157
+ BUILTIN_VDC (STORESTRUCT, st2, 0)
15158
+ BUILTIN_VDC (STORESTRUCT, st3, 0)
15159
+ BUILTIN_VDC (STORESTRUCT, st4, 0)
15160
/* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */
15161
- BUILTIN_VQ (STORESTRUCT, st2)
15162
- BUILTIN_VQ (STORESTRUCT, st3)
15163
- BUILTIN_VQ (STORESTRUCT, st4)
15164
+ BUILTIN_VQ (STORESTRUCT, st2, 0)
15165
+ BUILTIN_VQ (STORESTRUCT, st3, 0)
15166
+ BUILTIN_VQ (STORESTRUCT, st4, 0)
15168
- BUILTIN_VQW (BINOP, saddl2)
15169
- BUILTIN_VQW (BINOP, uaddl2)
15170
- BUILTIN_VQW (BINOP, ssubl2)
15171
- BUILTIN_VQW (BINOP, usubl2)
15172
- BUILTIN_VQW (BINOP, saddw2)
15173
- BUILTIN_VQW (BINOP, uaddw2)
15174
- BUILTIN_VQW (BINOP, ssubw2)
15175
- BUILTIN_VQW (BINOP, usubw2)
15176
+ BUILTIN_VQW (BINOP, saddl2, 0)
15177
+ BUILTIN_VQW (BINOP, uaddl2, 0)
15178
+ BUILTIN_VQW (BINOP, ssubl2, 0)
15179
+ BUILTIN_VQW (BINOP, usubl2, 0)
15180
+ BUILTIN_VQW (BINOP, saddw2, 0)
15181
+ BUILTIN_VQW (BINOP, uaddw2, 0)
15182
+ BUILTIN_VQW (BINOP, ssubw2, 0)
15183
+ BUILTIN_VQW (BINOP, usubw2, 0)
15184
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */
15185
- BUILTIN_VDW (BINOP, saddl)
15186
- BUILTIN_VDW (BINOP, uaddl)
15187
- BUILTIN_VDW (BINOP, ssubl)
15188
- BUILTIN_VDW (BINOP, usubl)
15189
+ BUILTIN_VDW (BINOP, saddl, 0)
15190
+ BUILTIN_VDW (BINOP, uaddl, 0)
15191
+ BUILTIN_VDW (BINOP, ssubl, 0)
15192
+ BUILTIN_VDW (BINOP, usubl, 0)
15193
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */
15194
- BUILTIN_VDW (BINOP, saddw)
15195
- BUILTIN_VDW (BINOP, uaddw)
15196
- BUILTIN_VDW (BINOP, ssubw)
15197
- BUILTIN_VDW (BINOP, usubw)
15198
+ BUILTIN_VDW (BINOP, saddw, 0)
15199
+ BUILTIN_VDW (BINOP, uaddw, 0)
15200
+ BUILTIN_VDW (BINOP, ssubw, 0)
15201
+ BUILTIN_VDW (BINOP, usubw, 0)
15202
/* Implemented by aarch64_<sur>h<addsub><mode>. */
15203
- BUILTIN_VQ_S (BINOP, shadd)
15204
- BUILTIN_VQ_S (BINOP, uhadd)
15205
- BUILTIN_VQ_S (BINOP, srhadd)
15206
- BUILTIN_VQ_S (BINOP, urhadd)
15207
+ BUILTIN_VQ_S (BINOP, shadd, 0)
15208
+ BUILTIN_VQ_S (BINOP, uhadd, 0)
15209
+ BUILTIN_VQ_S (BINOP, srhadd, 0)
15210
+ BUILTIN_VQ_S (BINOP, urhadd, 0)
15211
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
15212
- BUILTIN_VQN (BINOP, addhn)
15213
- BUILTIN_VQN (BINOP, raddhn)
15214
+ BUILTIN_VQN (BINOP, addhn, 0)
15215
+ BUILTIN_VQN (BINOP, raddhn, 0)
15216
/* Implemented by aarch64_<sur><addsub>hn2<mode>. */
15217
- BUILTIN_VQN (TERNOP, addhn2)
15218
- BUILTIN_VQN (TERNOP, raddhn2)
15219
+ BUILTIN_VQN (TERNOP, addhn2, 0)
15220
+ BUILTIN_VQN (TERNOP, raddhn2, 0)
15222
- BUILTIN_VSQN_HSDI (UNOP, sqmovun)
15223
+ BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
15224
/* Implemented by aarch64_<sur>qmovn<mode>. */
15225
- BUILTIN_VSQN_HSDI (UNOP, sqmovn)
15226
- BUILTIN_VSQN_HSDI (UNOP, uqmovn)
15227
+ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
15228
+ BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
15229
/* Implemented by aarch64_s<optab><mode>. */
15230
- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs)
15231
- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg)
15232
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
15233
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
15235
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane)
15236
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane)
15237
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq)
15238
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq)
15239
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2)
15240
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2)
15241
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane)
15242
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane)
15243
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq)
15244
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq)
15245
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n)
15246
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n)
15247
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
15248
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
15249
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
15250
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
15251
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
15252
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
15253
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
15254
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
15255
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
15256
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
15257
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
15258
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
15259
/* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */
15260
- BUILTIN_VSD_HSI (TERNOP, sqdmlal)
15261
- BUILTIN_VSD_HSI (TERNOP, sqdmlsl)
15262
+ BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
15263
+ BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
15264
/* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */
15265
- BUILTIN_VD_HSI (TERNOP, sqdmlal_n)
15266
- BUILTIN_VD_HSI (TERNOP, sqdmlsl_n)
15267
+ BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
15268
+ BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
15270
- BUILTIN_VSD_HSI (BINOP, sqdmull)
15271
- BUILTIN_VSD_HSI (TERNOP, sqdmull_lane)
15272
- BUILTIN_VD_HSI (TERNOP, sqdmull_laneq)
15273
- BUILTIN_VD_HSI (BINOP, sqdmull_n)
15274
- BUILTIN_VQ_HSI (BINOP, sqdmull2)
15275
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane)
15276
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq)
15277
- BUILTIN_VQ_HSI (BINOP, sqdmull2_n)
15278
+ BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
15279
+ BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
15280
+ BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
15281
+ BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
15282
+ BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
15283
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
15284
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0)
15285
+ BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
15286
/* Implemented by aarch64_sq<r>dmulh<mode>. */
15287
- BUILTIN_VSDQ_HSI (BINOP, sqdmulh)
15288
- BUILTIN_VSDQ_HSI (BINOP, sqrdmulh)
15289
+ BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
15290
+ BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
15291
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
15292
- BUILTIN_VDQHS (TERNOP, sqdmulh_lane)
15293
- BUILTIN_VDQHS (TERNOP, sqdmulh_laneq)
15294
- BUILTIN_VDQHS (TERNOP, sqrdmulh_lane)
15295
- BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq)
15296
- BUILTIN_SD_HSI (TERNOP, sqdmulh_lane)
15297
- BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane)
15298
+ BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
15299
+ BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
15300
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
15301
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
15302
+ BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
15303
+ BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
15305
- BUILTIN_VSDQ_I_DI (BINOP, sshl_n)
15306
- BUILTIN_VSDQ_I_DI (BINOP, ushl_n)
15307
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
15308
/* Implemented by aarch64_<sur>shl<mode>. */
15309
- BUILTIN_VSDQ_I_DI (BINOP, sshl)
15310
- BUILTIN_VSDQ_I_DI (BINOP, ushl)
15311
- BUILTIN_VSDQ_I_DI (BINOP, srshl)
15312
- BUILTIN_VSDQ_I_DI (BINOP, urshl)
15313
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
15314
+ BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
15315
+ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
15316
+ BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
15318
- BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n)
15319
- BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n)
15320
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3)
15321
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
15322
/* Implemented by aarch64_<sur>shr_n<mode>. */
15323
- BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n)
15324
- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n)
15325
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
15326
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
15327
/* Implemented by aarch64_<sur>sra_n<mode>. */
15328
- BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n)
15329
- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n)
15330
- BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n)
15331
- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n)
15332
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
15333
+ BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
15334
+ BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
15335
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
15336
/* Implemented by aarch64_<sur>shll_n<mode>. */
15337
- BUILTIN_VDW (SHIFTIMM, sshll_n)
15338
- BUILTIN_VDW (SHIFTIMM, ushll_n)
15339
+ BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
15340
+ BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
15341
/* Implemented by aarch64_<sur>shll2_n<mode>. */
15342
- BUILTIN_VQW (SHIFTIMM, sshll2_n)
15343
- BUILTIN_VQW (SHIFTIMM, ushll2_n)
15344
+ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
15345
+ BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
15346
/* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
15347
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n)
15348
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n)
15349
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n)
15350
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n)
15351
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n)
15352
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n)
15353
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
15354
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
15355
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
15356
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
15357
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
15358
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
15359
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
15360
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n)
15361
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n)
15362
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n)
15363
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n)
15364
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
15365
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
15366
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
15367
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
15368
/* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
15369
- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n)
15370
- BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n)
15371
- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n)
15372
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
15373
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
15374
+ BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
15376
/* Implemented by aarch64_cm<cmp><mode>. */
15377
- BUILTIN_VSDQ_I_DI (BINOP, cmeq)
15378
- BUILTIN_VSDQ_I_DI (BINOP, cmge)
15379
- BUILTIN_VSDQ_I_DI (BINOP, cmgt)
15380
- BUILTIN_VSDQ_I_DI (BINOP, cmle)
15381
- BUILTIN_VSDQ_I_DI (BINOP, cmlt)
15382
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
15383
+ BUILTIN_VALLDI (BINOP, cmge, 0)
15384
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
15385
+ BUILTIN_VALLDI (BINOP, cmle, 0)
15386
+ BUILTIN_VALLDI (BINOP, cmlt, 0)
15387
/* Implemented by aarch64_cm<cmp><mode>. */
15388
- BUILTIN_VSDQ_I_DI (BINOP, cmhs)
15389
- BUILTIN_VSDQ_I_DI (BINOP, cmhi)
15390
- BUILTIN_VSDQ_I_DI (BINOP, cmtst)
15391
+ BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
15392
+ BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
15393
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
15395
- /* Implemented by aarch64_<fmaxmin><mode>. */
15396
- BUILTIN_VDQF (BINOP, fmax)
15397
- BUILTIN_VDQF (BINOP, fmin)
15398
- /* Implemented by aarch64_<maxmin><mode>. */
15399
- BUILTIN_VDQ_BHSI (BINOP, smax)
15400
- BUILTIN_VDQ_BHSI (BINOP, smin)
15401
- BUILTIN_VDQ_BHSI (BINOP, umax)
15402
- BUILTIN_VDQ_BHSI (BINOP, umin)
15403
+ /* Implemented by reduc_<sur>plus_<mode>. */
15404
+ BUILTIN_VALL (UNOP, reduc_splus_, 10)
15405
+ BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
15407
- /* Implemented by aarch64_frint<frint_suffix><mode>. */
15408
- BUILTIN_VDQF (UNOP, frintz)
15409
- BUILTIN_VDQF (UNOP, frintp)
15410
- BUILTIN_VDQF (UNOP, frintm)
15411
- BUILTIN_VDQF (UNOP, frinti)
15412
- BUILTIN_VDQF (UNOP, frintx)
15413
- BUILTIN_VDQF (UNOP, frinta)
15414
+ /* Implemented by reduc_<maxmin_uns>_<mode>. */
15415
+ BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
15416
+ BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
15417
+ BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10)
15418
+ BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10)
15419
+ BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10)
15420
+ BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10)
15422
- /* Implemented by aarch64_fcvt<frint_suffix><su><mode>. */
15423
- BUILTIN_VDQF (UNOP, fcvtzs)
15424
- BUILTIN_VDQF (UNOP, fcvtzu)
15425
- BUILTIN_VDQF (UNOP, fcvtas)
15426
- BUILTIN_VDQF (UNOP, fcvtau)
15427
- BUILTIN_VDQF (UNOP, fcvtps)
15428
- BUILTIN_VDQF (UNOP, fcvtpu)
15429
- BUILTIN_VDQF (UNOP, fcvtms)
15430
- BUILTIN_VDQF (UNOP, fcvtmu)
15431
+ /* Implemented by <maxmin><mode>3.
15432
+ smax variants map to fmaxnm,
15433
+ smax_nan variants map to fmax. */
15434
+ BUILTIN_VDQIF (BINOP, smax, 3)
15435
+ BUILTIN_VDQIF (BINOP, smin, 3)
15436
+ BUILTIN_VDQ_BHSI (BINOP, umax, 3)
15437
+ BUILTIN_VDQ_BHSI (BINOP, umin, 3)
15438
+ BUILTIN_VDQF (BINOP, smax_nan, 3)
15439
+ BUILTIN_VDQF (BINOP, smin_nan, 3)
15441
+ /* Implemented by <frint_pattern><mode>2. */
15442
+ BUILTIN_VDQF (UNOP, btrunc, 2)
15443
+ BUILTIN_VDQF (UNOP, ceil, 2)
15444
+ BUILTIN_VDQF (UNOP, floor, 2)
15445
+ BUILTIN_VDQF (UNOP, nearbyint, 2)
15446
+ BUILTIN_VDQF (UNOP, rint, 2)
15447
+ BUILTIN_VDQF (UNOP, round, 2)
15448
+ BUILTIN_VDQF (UNOP, frintn, 2)
15450
+ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
15451
+ VAR1 (UNOP, lbtruncv2sf, 2, v2si)
15452
+ VAR1 (UNOP, lbtruncv4sf, 2, v4si)
15453
+ VAR1 (UNOP, lbtruncv2df, 2, v2di)
15455
+ VAR1 (UNOP, lbtruncuv2sf, 2, v2si)
15456
+ VAR1 (UNOP, lbtruncuv4sf, 2, v4si)
15457
+ VAR1 (UNOP, lbtruncuv2df, 2, v2di)
15459
+ VAR1 (UNOP, lroundv2sf, 2, v2si)
15460
+ VAR1 (UNOP, lroundv4sf, 2, v4si)
15461
+ VAR1 (UNOP, lroundv2df, 2, v2di)
15462
+ /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */
15463
+ VAR1 (UNOP, lroundsf, 2, si)
15464
+ VAR1 (UNOP, lrounddf, 2, di)
15466
+ VAR1 (UNOP, lrounduv2sf, 2, v2si)
15467
+ VAR1 (UNOP, lrounduv4sf, 2, v4si)
15468
+ VAR1 (UNOP, lrounduv2df, 2, v2di)
15469
+ VAR1 (UNOP, lroundusf, 2, si)
15470
+ VAR1 (UNOP, lroundudf, 2, di)
15472
+ VAR1 (UNOP, lceilv2sf, 2, v2si)
15473
+ VAR1 (UNOP, lceilv4sf, 2, v4si)
15474
+ VAR1 (UNOP, lceilv2df, 2, v2di)
15476
+ VAR1 (UNOP, lceiluv2sf, 2, v2si)
15477
+ VAR1 (UNOP, lceiluv4sf, 2, v4si)
15478
+ VAR1 (UNOP, lceiluv2df, 2, v2di)
15479
+ VAR1 (UNOP, lceilusf, 2, si)
15480
+ VAR1 (UNOP, lceiludf, 2, di)
15482
+ VAR1 (UNOP, lfloorv2sf, 2, v2si)
15483
+ VAR1 (UNOP, lfloorv4sf, 2, v4si)
15484
+ VAR1 (UNOP, lfloorv2df, 2, v2di)
15486
+ VAR1 (UNOP, lflooruv2sf, 2, v2si)
15487
+ VAR1 (UNOP, lflooruv4sf, 2, v4si)
15488
+ VAR1 (UNOP, lflooruv2df, 2, v2di)
15489
+ VAR1 (UNOP, lfloorusf, 2, si)
15490
+ VAR1 (UNOP, lfloorudf, 2, di)
15492
+ VAR1 (UNOP, lfrintnv2sf, 2, v2si)
15493
+ VAR1 (UNOP, lfrintnv4sf, 2, v4si)
15494
+ VAR1 (UNOP, lfrintnv2df, 2, v2di)
15495
+ VAR1 (UNOP, lfrintnsf, 2, si)
15496
+ VAR1 (UNOP, lfrintndf, 2, di)
15498
+ VAR1 (UNOP, lfrintnuv2sf, 2, v2si)
15499
+ VAR1 (UNOP, lfrintnuv4sf, 2, v4si)
15500
+ VAR1 (UNOP, lfrintnuv2df, 2, v2di)
15501
+ VAR1 (UNOP, lfrintnusf, 2, si)
15502
+ VAR1 (UNOP, lfrintnudf, 2, di)
15504
+ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
15505
+ VAR1 (UNOP, floatv2si, 2, v2sf)
15506
+ VAR1 (UNOP, floatv4si, 2, v4sf)
15507
+ VAR1 (UNOP, floatv2di, 2, v2df)
15509
+ VAR1 (UNOP, floatunsv2si, 2, v2sf)
15510
+ VAR1 (UNOP, floatunsv4si, 2, v4sf)
15511
+ VAR1 (UNOP, floatunsv2di, 2, v2df)
15514
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
15515
- BUILTIN_VALL (BINOP, zip1)
15516
- BUILTIN_VALL (BINOP, zip2)
15517
- BUILTIN_VALL (BINOP, uzp1)
15518
- BUILTIN_VALL (BINOP, uzp2)
15519
- BUILTIN_VALL (BINOP, trn1)
15520
- BUILTIN_VALL (BINOP, trn2)
15521
+ BUILTIN_VALL (BINOP, zip1, 0)
15522
+ BUILTIN_VALL (BINOP, zip2, 0)
15523
+ BUILTIN_VALL (BINOP, uzp1, 0)
15524
+ BUILTIN_VALL (BINOP, uzp2, 0)
15525
+ BUILTIN_VALL (BINOP, trn1, 0)
15526
+ BUILTIN_VALL (BINOP, trn2, 0)
15528
+ /* Implemented by
15529
+ aarch64_frecp<FRECP:frecp_suffix><mode>. */
15530
+ BUILTIN_GPF (UNOP, frecpe, 0)
15531
+ BUILTIN_GPF (BINOP, frecps, 0)
15532
+ BUILTIN_GPF (UNOP, frecpx, 0)
15534
+ BUILTIN_VDQF (UNOP, frecpe, 0)
15535
+ BUILTIN_VDQF (BINOP, frecps, 0)
15537
+ BUILTIN_VDQF (UNOP, abs, 2)
15539
+ VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
15540
+ VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
15542
+ VAR1 (UNOP, float_extend_lo_, 0, v2df)
15543
+ VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
15544
--- a/src/gcc/config/aarch64/constraints.md
15545
+++ b/src/gcc/config/aarch64/constraints.md
15547
"Integer constant zero."
15548
(match_test "op == const0_rtx"))
15550
-(define_constraint "Usa"
15551
- "A constraint that matches an absolute symbolic address."
15552
- (and (match_code "const,symbol_ref")
15553
- (match_test "aarch64_symbolic_address_p (op)")))
15555
(define_constraint "Ush"
15556
"A constraint that matches an absolute symbolic address high part."
15557
(and (match_code "high")
15558
@@ -148,10 +143,25 @@
15560
A constraint that matches vector of immediates."
15561
(and (match_code "const_vector")
15562
- (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
15563
- NULL, NULL, NULL,
15564
- NULL, NULL) != 0")))
15565
+ (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
15568
+(define_constraint "Dh"
15570
+ A constraint that matches an immediate operand valid for\
15571
+ AdvSIMD scalar move in HImode."
15572
+ (and (match_code "const_int")
15573
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
15576
+(define_constraint "Dq"
15578
+ A constraint that matches an immediate operand valid for\
15579
+ AdvSIMD scalar move in QImode."
15580
+ (and (match_code "const_int")
15581
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
15584
(define_constraint "Dl"
15586
A constraint that matches vector of immediates for left shifts."
15587
--- a/src/gcc/config/aarch64/aarch64.c
15588
+++ b/src/gcc/config/aarch64/aarch64.c
15590
enum aarch64_symbol_type symbol_type;
15593
+struct simd_immediate_info
15597
+ int element_width;
15601
/* The current code model. */
15602
enum aarch64_code_model aarch64_cmodel;
15604
@@ -103,8 +111,6 @@
15605
static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
15606
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
15607
static void aarch64_override_options_after_change (void);
15608
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
15609
- int *, unsigned char *, int *, int *);
15610
static bool aarch64_vector_mode_supported_p (enum machine_mode);
15611
static unsigned bit_count (unsigned HOST_WIDE_INT);
15612
static bool aarch64_const_vec_all_same_int_p (rtx,
15613
@@ -524,13 +530,15 @@
15617
+ case SYMBOL_TINY_ABSOLUTE:
15618
+ emit_insn (gen_rtx_SET (Pmode, dest, imm));
15621
case SYMBOL_SMALL_GOT:
15623
rtx tmp_reg = dest;
15624
if (can_create_pseudo_p ())
15626
- tmp_reg = gen_reg_rtx (Pmode);
15628
+ tmp_reg = gen_reg_rtx (Pmode);
15629
emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
15630
emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
15632
@@ -656,6 +664,47 @@
15633
|| ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
15636
+/* Split a complex SIMD move. */
15639
+aarch64_split_simd_move (rtx dst, rtx src)
15641
+ enum machine_mode src_mode = GET_MODE (src);
15642
+ enum machine_mode dst_mode = GET_MODE (dst);
15644
+ gcc_assert (VECTOR_MODE_P (dst_mode));
15646
+ if (REG_P (dst) && REG_P (src))
15648
+ gcc_assert (VECTOR_MODE_P (src_mode));
15650
+ switch (src_mode)
15653
+ emit_insn (gen_aarch64_simd_movv16qi (dst, src));
15656
+ emit_insn (gen_aarch64_simd_movv8hi (dst, src));
15659
+ emit_insn (gen_aarch64_simd_movv4si (dst, src));
15662
+ emit_insn (gen_aarch64_simd_movv2di (dst, src));
15665
+ emit_insn (gen_aarch64_simd_movv4sf (dst, src));
15668
+ emit_insn (gen_aarch64_simd_movv2df (dst, src));
15671
+ gcc_unreachable ();
15678
aarch64_force_temporary (rtx x, rtx value)
15680
@@ -745,6 +794,7 @@
15682
case SYMBOL_SMALL_TPREL:
15683
case SYMBOL_SMALL_ABSOLUTE:
15684
+ case SYMBOL_TINY_ABSOLUTE:
15685
aarch64_load_symref_appropriately (dest, imm, sty);
15688
@@ -3087,7 +3137,8 @@
15689
if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
15691
&& (code == EQ || code == NE || code == LT || code == GE)
15692
- && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
15693
+ && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
15694
+ || GET_CODE (x) == NEG))
15697
/* A compare with a shifted operand. Because of canonicalization,
15698
@@ -3349,7 +3400,7 @@
15699
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
15702
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
15703
+ asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
15707
@@ -3362,18 +3413,17 @@
15708
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
15711
- asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
15712
- REGNO (x) - V0_REGNUM + (code - 'S'));
15713
+ asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
15717
- /* Print integer constant in hex. */
15718
+ /* Print bottom 16 bits of integer constant in hex. */
15719
if (GET_CODE (x) != CONST_INT)
15721
output_operand_lossage ("invalid operand for '%%%c'", code);
15724
- asm_fprintf (f, "0x%wx", UINTVAL (x));
15725
+ asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
15729
@@ -3383,20 +3433,19 @@
15730
if (x == const0_rtx
15731
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
15733
- asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
15734
+ asm_fprintf (f, "%czr", code);
15738
if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
15740
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
15741
- REGNO (x) - R0_REGNUM);
15742
+ asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
15746
if (REG_P (x) && REGNO (x) == SP_REGNUM)
15748
- asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
15749
+ asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
15753
@@ -4956,6 +5005,7 @@
15755
/* Return the method that should be used to access SYMBOL_REF or
15756
LABEL_REF X in context CONTEXT. */
15758
enum aarch64_symbol_type
15759
aarch64_classify_symbol (rtx x,
15760
enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
15761
@@ -4969,6 +5019,8 @@
15763
case AARCH64_CMODEL_TINY_PIC:
15764
case AARCH64_CMODEL_TINY:
15765
+ return SYMBOL_TINY_ABSOLUTE;
15767
case AARCH64_CMODEL_SMALL_PIC:
15768
case AARCH64_CMODEL_SMALL:
15769
return SYMBOL_SMALL_ABSOLUTE;
15770
@@ -4978,48 +5030,42 @@
15774
- gcc_assert (GET_CODE (x) == SYMBOL_REF);
15776
- switch (aarch64_cmodel)
15777
+ if (GET_CODE (x) == SYMBOL_REF)
15779
- case AARCH64_CMODEL_LARGE:
15780
- return SYMBOL_FORCE_TO_MEM;
15782
- case AARCH64_CMODEL_TINY:
15783
- case AARCH64_CMODEL_SMALL:
15785
- /* This is needed to get DFmode, TImode constants to be loaded off
15786
- the constant pool. Is it necessary to dump TImode values into
15787
- the constant pool. We don't handle TImode constant loads properly
15788
- yet and hence need to use the constant pool. */
15789
- if (CONSTANT_POOL_ADDRESS_P (x))
15790
+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE
15791
+ || CONSTANT_POOL_ADDRESS_P (x))
15792
return SYMBOL_FORCE_TO_MEM;
15794
if (aarch64_tls_symbol_p (x))
15795
return aarch64_classify_tls_symbol (x);
15797
- if (SYMBOL_REF_WEAK (x))
15798
- return SYMBOL_FORCE_TO_MEM;
15799
+ switch (aarch64_cmodel)
15801
+ case AARCH64_CMODEL_TINY:
15802
+ if (SYMBOL_REF_WEAK (x))
15803
+ return SYMBOL_FORCE_TO_MEM;
15804
+ return SYMBOL_TINY_ABSOLUTE;
15806
- return SYMBOL_SMALL_ABSOLUTE;
15807
+ case AARCH64_CMODEL_SMALL:
15808
+ if (SYMBOL_REF_WEAK (x))
15809
+ return SYMBOL_FORCE_TO_MEM;
15810
+ return SYMBOL_SMALL_ABSOLUTE;
15812
- case AARCH64_CMODEL_TINY_PIC:
15813
- case AARCH64_CMODEL_SMALL_PIC:
15814
+ case AARCH64_CMODEL_TINY_PIC:
15815
+ if (!aarch64_symbol_binds_local_p (x))
15816
+ return SYMBOL_SMALL_GOT;
15817
+ return SYMBOL_TINY_ABSOLUTE;
15819
- if (CONSTANT_POOL_ADDRESS_P (x))
15820
- return SYMBOL_FORCE_TO_MEM;
15821
+ case AARCH64_CMODEL_SMALL_PIC:
15822
+ if (!aarch64_symbol_binds_local_p (x))
15823
+ return SYMBOL_SMALL_GOT;
15824
+ return SYMBOL_SMALL_ABSOLUTE;
15826
- if (aarch64_tls_symbol_p (x))
15827
- return aarch64_classify_tls_symbol (x);
15829
+ gcc_unreachable ();
15833
- if (!aarch64_symbol_binds_local_p (x))
15834
- return SYMBOL_SMALL_GOT;
15836
- return SYMBOL_SMALL_ABSOLUTE;
15839
- gcc_unreachable ();
15841
/* By default push everything into the constant pool. */
15842
return SYMBOL_FORCE_TO_MEM;
15844
@@ -5092,8 +5138,7 @@
15845
/* This could probably go away because
15846
we now decompose CONST_INTs according to expand_mov_immediate. */
15847
if ((GET_CODE (x) == CONST_VECTOR
15848
- && aarch64_simd_valid_immediate (x, mode, false,
15849
- NULL, NULL, NULL, NULL, NULL) != -1)
15850
+ && aarch64_simd_valid_immediate (x, mode, false, NULL))
15851
|| CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
15852
return !targetm.cannot_force_const_mem (mode, x);
15854
@@ -5924,32 +5969,57 @@
15858
-/* Return quad mode as the preferred SIMD mode. */
15859
+/* Return appropriate SIMD container
15860
+ for MODE within a vector of WIDTH bits. */
15861
static enum machine_mode
15862
-aarch64_preferred_simd_mode (enum machine_mode mode)
15863
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
15865
+ gcc_assert (width == 64 || width == 128);
15878
- return V16QImode;
15886
+ if (width == 128)
15898
+ return V16QImode;
15922
+/* Return 128-bit container as the preferred SIMD mode for MODE. */
15923
+static enum machine_mode
15924
+aarch64_preferred_simd_mode (enum machine_mode mode)
15926
+ return aarch64_simd_container_mode (mode, 128);
15929
/* Return the bitmask of possible vector sizes for the vectorizer
15930
to iterate over. */
15931
static unsigned int
15932
@@ -6037,7 +6107,7 @@
15935
/* Return the equivalent letter for size. */
15936
-static unsigned char
15938
sizetochar (int size)
15941
@@ -6084,15 +6154,10 @@
15942
return aarch64_float_const_representable_p (x0);
15945
-/* TODO: This function returns values similar to those
15946
- returned by neon_valid_immediate in gcc/config/arm/arm.c
15947
- but the API here is different enough that these magic numbers
15948
- are not used. It should be sufficient to return true or false. */
15950
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
15951
- rtx *modconst, int *elementwidth,
15952
- unsigned char *elementchar,
15953
- int *mvn, int *shift)
15954
+/* Return true for valid and false for invalid. */
15956
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
15957
+ struct simd_immediate_info *info)
15959
#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
15961
@@ -6103,7 +6168,6 @@
15963
immtype = (CLASS); \
15964
elsize = (ELSIZE); \
15965
- elchar = sizetochar (elsize); \
15966
eshift = (SHIFT); \
15969
@@ -6112,36 +6176,25 @@
15970
unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
15971
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
15972
unsigned char bytes[16];
15973
- unsigned char elchar = 0;
15974
int immtype = -1, matches;
15975
unsigned int invmask = inverse ? 0xff : 0;
15978
if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
15980
- bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
15981
- int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
15982
+ if (! (aarch64_simd_imm_zero_p (op, mode)
15983
+ || aarch64_vect_float_const_representable_p (op)))
15986
- if (!(simd_imm_zero
15987
- || aarch64_vect_float_const_representable_p (op)))
15991
+ info->value = CONST_VECTOR_ELT (op, 0);
15992
+ info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
15993
+ info->mvn = false;
15998
- *modconst = CONST_VECTOR_ELT (op, 0);
16000
- if (elementwidth)
16001
- *elementwidth = elem_width;
16004
- *elementchar = sizetochar (elem_width);
16009
- if (simd_imm_zero)
16016
/* Splat vector constant out into a byte vector. */
16017
@@ -6239,23 +6292,14 @@
16019
|| (immtype >= 12 && immtype <= 15)
16026
+ info->element_width = elsize;
16027
+ info->mvn = emvn != 0;
16028
+ info->shift = eshift;
16030
- if (elementwidth)
16031
- *elementwidth = elsize;
16034
- *elementchar = elchar;
16044
unsigned HOST_WIDE_INT imm = 0;
16046
/* Un-invert bytes of recognized vector, if necessary. */
16047
@@ -6272,68 +6316,27 @@
16048
imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
16049
<< (i * BITS_PER_UNIT);
16051
- *modconst = GEN_INT (imm);
16054
+ info->value = GEN_INT (imm);
16058
- unsigned HOST_WIDE_INT imm = 0;
16060
+ for (i = 0; i < elsize / BITS_PER_UNIT; i++)
16061
+ imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
16063
- for (i = 0; i < elsize / BITS_PER_UNIT; i++)
16064
- imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
16066
/* Construct 'abcdefgh' because the assembler cannot handle
16067
- generic constants. */
16068
- gcc_assert (shift != NULL && mvn != NULL);
16070
+ generic constants. */
16073
- imm = (imm >> *shift) & 0xff;
16074
- *modconst = GEN_INT (imm);
16076
+ imm = (imm >> info->shift) & 0xff;
16077
+ info->value = GEN_INT (imm);
16086
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
16087
- (or, implicitly, MVNI) immediate. Write back width per element
16088
- to *ELEMENTWIDTH, and a modified constant (whatever should be output
16089
- for a MOVI instruction) in *MODCONST. */
16091
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
16092
- rtx *modconst, int *elementwidth,
16093
- unsigned char *elementchar,
16094
- int *mvn, int *shift)
16098
- unsigned char tmpwidthc;
16099
- int tmpmvn = 0, tmpshift = 0;
16100
- int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
16101
- &tmpwidth, &tmpwidthc,
16102
- &tmpmvn, &tmpshift);
16104
- if (retval == -1)
16108
- *modconst = tmpconst;
16110
- if (elementwidth)
16111
- *elementwidth = tmpwidth;
16114
- *elementchar = tmpwidthc;
16120
- *shift = tmpshift;
16126
aarch64_const_vec_all_same_int_p (rtx x,
16127
HOST_WIDE_INT minval,
16128
@@ -6395,6 +6398,25 @@
16133
+aarch64_mov_operand_p (rtx x,
16134
+ enum aarch64_symbol_context context,
16135
+ enum machine_mode mode)
16137
+ if (GET_CODE (x) == HIGH
16138
+ && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
16141
+ if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
16144
+ if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
16147
+ return aarch64_classify_symbolic_expression (x, context)
16148
+ == SYMBOL_TINY_ABSOLUTE;
16151
/* Return a const_int vector of VAL. */
16153
aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
16154
@@ -6409,6 +6431,19 @@
16155
return gen_rtx_CONST_VECTOR (mode, v);
16158
+/* Check OP is a legal scalar immediate for the MOVI instruction. */
16161
+aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
16163
+ enum machine_mode vmode;
16165
+ gcc_assert (!VECTOR_MODE_P (mode));
16166
+ vmode = aarch64_preferred_simd_mode (mode);
16167
+ rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
16168
+ return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
16171
/* Construct and return a PARALLEL RTX vector. */
16173
aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
16174
@@ -6634,8 +6669,7 @@
16175
gcc_unreachable ();
16177
if (const_vec != NULL_RTX
16178
- && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
16179
- NULL, NULL, NULL))
16180
+ && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
16181
/* Load using MOVI/MVNI. */
16183
else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
16184
@@ -7193,49 +7227,78 @@
16188
-aarch64_output_simd_mov_immediate (rtx *const_vector,
16189
+aarch64_output_simd_mov_immediate (rtx const_vector,
16190
enum machine_mode mode,
16194
- unsigned char widthc;
16195
- int lane_width_bits;
16197
static char templ[40];
16198
- int shift = 0, mvn = 0;
16199
const char *mnemonic;
16200
unsigned int lane_count = 0;
16201
+ char element_char;
16204
- aarch64_simd_immediate_valid_for_move (*const_vector, mode,
16205
- const_vector, &lane_width_bits,
16206
- &widthc, &mvn, &shift);
16207
+ struct simd_immediate_info info;
16209
+ /* This will return true to show const_vector is legal for use as either
16210
+ a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
16211
+ also update INFO to show how the immediate should be generated. */
16212
+ is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
16213
gcc_assert (is_valid);
16215
+ element_char = sizetochar (info.element_width);
16216
+ lane_count = width / info.element_width;
16218
mode = GET_MODE_INNER (mode);
16219
if (mode == SFmode || mode == DFmode)
16222
- aarch64_float_const_zero_rtx_p (*const_vector);
16223
- gcc_assert (shift == 0);
16224
- mnemonic = zero_p ? "movi" : "fmov";
16225
+ gcc_assert (info.shift == 0 && ! info.mvn);
16226
+ if (aarch64_float_const_zero_rtx_p (info.value))
16227
+ info.value = GEN_INT (0);
16230
+#define buf_size 20
16231
+ REAL_VALUE_TYPE r;
16232
+ REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
16233
+ char float_buf[buf_size] = {'\0'};
16234
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
16237
+ if (lane_count == 1)
16238
+ snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
16240
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
16241
+ lane_count, element_char, float_buf);
16246
- mnemonic = mvn ? "mvni" : "movi";
16248
- gcc_assert (lane_width_bits != 0);
16249
- lane_count = width / lane_width_bits;
16250
+ mnemonic = info.mvn ? "mvni" : "movi";
16252
if (lane_count == 1)
16253
- snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
16255
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
16256
- mnemonic, lane_count, widthc, shift);
16257
+ snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
16258
+ mnemonic, UINTVAL (info.value));
16259
+ else if (info.shift)
16260
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
16261
+ ", lsl %d", mnemonic, lane_count, element_char,
16262
+ UINTVAL (info.value), info.shift);
16264
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
16265
- mnemonic, lane_count, widthc);
16266
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
16267
+ mnemonic, lane_count, element_char, UINTVAL (info.value));
16272
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
16273
+ enum machine_mode mode)
16275
+ enum machine_mode vmode;
16277
+ gcc_assert (!VECTOR_MODE_P (mode));
16278
+ vmode = aarch64_simd_container_mode (mode, 64);
16279
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
16280
+ return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
16283
/* Split operands into moves from op[1] + op[2] into op[0]. */
16286
@@ -7860,6 +7923,9 @@
16287
#undef TARGET_EXPAND_BUILTIN_VA_START
16288
#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
16290
+#undef TARGET_FOLD_BUILTIN
16291
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
16293
#undef TARGET_FUNCTION_ARG
16294
#define TARGET_FUNCTION_ARG aarch64_function_arg
16296
@@ -7881,6 +7947,9 @@
16297
#undef TARGET_FRAME_POINTER_REQUIRED
16298
#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
16300
+#undef TARGET_GIMPLE_FOLD_BUILTIN
16301
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
16303
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
16304
#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
16306
--- a/src/gcc/config/aarch64/iterators.md
16307
+++ b/src/gcc/config/aarch64/iterators.md
16309
;; Vector Float modes.
16310
(define_mode_iterator VDQF [V2SF V4SF V2DF])
16312
+;; Modes suitable to use as the return type of a vcond expression.
16313
+(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
16315
+;; All Float modes.
16316
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
16318
;; Vector Float modes with 2 elements.
16319
(define_mode_iterator V2F [V2SF V2DF])
16321
@@ -160,10 +166,15 @@
16323
UNSPEC_ASHIFT_SIGNED ; Used in aarch-simd.md.
16324
UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md.
16325
+ UNSPEC_FMAX ; Used in aarch64-simd.md.
16326
+ UNSPEC_FMAXNMV ; Used in aarch64-simd.md.
16327
UNSPEC_FMAXV ; Used in aarch64-simd.md.
16328
+ UNSPEC_FMIN ; Used in aarch64-simd.md.
16329
+ UNSPEC_FMINNMV ; Used in aarch64-simd.md.
16330
UNSPEC_FMINV ; Used in aarch64-simd.md.
16331
UNSPEC_FADDV ; Used in aarch64-simd.md.
16332
- UNSPEC_ADDV ; Used in aarch64-simd.md.
16333
+ UNSPEC_SADDV ; Used in aarch64-simd.md.
16334
+ UNSPEC_UADDV ; Used in aarch64-simd.md.
16335
UNSPEC_SMAXV ; Used in aarch64-simd.md.
16336
UNSPEC_SMINV ; Used in aarch64-simd.md.
16337
UNSPEC_UMAXV ; Used in aarch64-simd.md.
16338
@@ -213,13 +224,6 @@
16339
UNSPEC_URSHL ; Used in aarch64-simd.md.
16340
UNSPEC_SQRSHL ; Used in aarch64-simd.md.
16341
UNSPEC_UQRSHL ; Used in aarch64-simd.md.
16342
- UNSPEC_CMEQ ; Used in aarch64-simd.md.
16343
- UNSPEC_CMLE ; Used in aarch64-simd.md.
16344
- UNSPEC_CMLT ; Used in aarch64-simd.md.
16345
- UNSPEC_CMGE ; Used in aarch64-simd.md.
16346
- UNSPEC_CMGT ; Used in aarch64-simd.md.
16347
- UNSPEC_CMHS ; Used in aarch64-simd.md.
16348
- UNSPEC_CMHI ; Used in aarch64-simd.md.
16349
UNSPEC_SSLI ; Used in aarch64-simd.md.
16350
UNSPEC_USLI ; Used in aarch64-simd.md.
16351
UNSPEC_SSRI ; Used in aarch64-simd.md.
16352
@@ -227,10 +231,6 @@
16353
UNSPEC_SSHLL ; Used in aarch64-simd.md.
16354
UNSPEC_USHLL ; Used in aarch64-simd.md.
16355
UNSPEC_ADDP ; Used in aarch64-simd.md.
16356
- UNSPEC_CMTST ; Used in aarch64-simd.md.
16357
- UNSPEC_FMAX ; Used in aarch64-simd.md.
16358
- UNSPEC_FMIN ; Used in aarch64-simd.md.
16359
- UNSPEC_BSL ; Used in aarch64-simd.md.
16360
UNSPEC_TBL ; Used in vector permute patterns.
16361
UNSPEC_CONCAT ; Used in vector permute patterns.
16362
UNSPEC_ZIP1 ; Used in vector permute patterns.
16363
@@ -249,8 +249,12 @@
16364
;; 32-bit version and "%x0" in the 64-bit version.
16365
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
16367
+;; For constraints used in scalar immediate vector moves
16368
+(define_mode_attr hq [(HI "h") (QI "q")])
16370
;; For scalar usage of vector/FP registers
16371
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
16372
+ (SF "s") (DF "d")
16373
(V8QI "") (V16QI "")
16374
(V4HI "") (V8HI "")
16375
(V2SI "") (V4SI "")
16376
@@ -305,7 +309,8 @@
16377
(V4SF ".4s") (V2DF ".2d")
16384
;; Register suffix narrowed modes for VQN.
16385
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
16386
@@ -444,7 +449,8 @@
16387
(V2SI "V2SI") (V4SI "V4SI")
16388
(DI "DI") (V2DI "V2DI")
16389
(V2SF "V2SI") (V4SF "V4SI")
16391
+ (V2DF "V2DI") (DF "DI")
16394
;; Lower case mode of results of comparison operations.
16395
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
16396
@@ -452,7 +458,8 @@
16397
(V2SI "v2si") (V4SI "v4si")
16398
(DI "di") (V2DI "v2di")
16399
(V2SF "v2si") (V4SF "v4si")
16401
+ (V2DF "v2di") (DF "di")
16404
;; Vm for lane instructions is restricted to FP_LO_REGS.
16405
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
16406
@@ -528,9 +535,14 @@
16407
;; Iterator for integer conversions
16408
(define_code_iterator FIXUORS [fix unsigned_fix])
16410
+;; Iterator for float conversions
16411
+(define_code_iterator FLOATUORS [float unsigned_float])
16413
;; Code iterator for variants of vector max and min.
16414
(define_code_iterator MAXMIN [smax smin umax umin])
16416
+(define_code_iterator FMAXMIN [smax smin])
16418
;; Code iterator for variants of vector max and min.
16419
(define_code_iterator ADDSUB [plus minus])
16421
@@ -543,6 +555,15 @@
16422
;; Code iterator for signed variants of vector saturating binary ops.
16423
(define_code_iterator SBINQOPS [ss_plus ss_minus])
16425
+;; Comparison operators for <F>CM.
16426
+(define_code_iterator COMPARISONS [lt le eq ge gt])
16428
+;; Unsigned comparison operators.
16429
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
16431
+;; Unsigned comparison operators.
16432
+(define_code_iterator FAC_COMPARISONS [lt le ge gt])
16434
;; -------------------------------------------------------------------
16436
;; -------------------------------------------------------------------
16437
@@ -555,6 +576,10 @@
16438
(zero_extend "zero_extend")
16439
(sign_extract "extv")
16440
(zero_extract "extzv")
16442
+ (unsigned_fix "fixuns")
16444
+ (unsigned_float "floatuns")
16448
@@ -571,12 +596,37 @@
16461
+;; For comparison operators we use the FCM* and CM* instructions.
16462
+;; As there are no CMLE or CMLT instructions which act on 3 vector
16463
+;; operands, we must use CMGE or CMGT and swap the order of the
16464
+;; source operands.
16466
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
16467
+ (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
16468
+(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
16469
+ (ltu "2") (leu "2") (geu "1") (gtu "1")])
16470
+(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
16471
+ (ltu "1") (leu "1") (geu "2") (gtu "2")])
16473
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
16474
+ (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
16476
+(define_code_attr fix_trunc_optab [(fix "fix_trunc")
16477
+ (unsigned_fix "fixuns_trunc")])
16479
;; Optab prefix for sign/zero-extending operations
16480
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
16481
(div "") (udiv "u")
16482
(fix "") (unsigned_fix "u")
16483
+ (float "s") (unsigned_float "u")
16484
(ss_plus "s") (us_plus "u")
16485
(ss_minus "s") (us_minus "u")])
16487
@@ -601,7 +651,9 @@
16488
(define_code_attr su [(sign_extend "s") (zero_extend "u")
16489
(sign_extract "s") (zero_extract "u")
16490
(fix "s") (unsigned_fix "u")
16491
- (div "s") (udiv "u")])
16492
+ (div "s") (udiv "u")
16493
+ (smax "s") (umax "u")
16494
+ (smin "s") (umin "u")])
16496
;; Emit cbz/cbnz depending on comparison type.
16497
(define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
16498
@@ -610,10 +662,10 @@
16499
(define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
16501
;; Max/min attributes.
16502
-(define_code_attr maxmin [(smax "smax")
16506
+(define_code_attr maxmin [(smax "max")
16511
;; MLA/MLS attributes.
16512
(define_code_attr as [(ss_plus "a") (ss_minus "s")])
16513
@@ -635,8 +687,11 @@
16514
(define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
16515
UNSPEC_SMAXV UNSPEC_SMINV])
16517
-(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV])
16518
+(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
16519
+ UNSPEC_FMAXNMV UNSPEC_FMINNMV])
16521
+(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
16523
(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
16524
UNSPEC_SRHADD UNSPEC_URHADD
16525
UNSPEC_SHSUB UNSPEC_UHSUB
16526
@@ -649,7 +704,7 @@
16527
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
16528
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
16530
-(define_int_iterator FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
16531
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
16533
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
16535
@@ -680,35 +735,44 @@
16536
UNSPEC_SQSHRN UNSPEC_UQSHRN
16537
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
16539
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
16540
- UNSPEC_CMLE UNSPEC_CMLT])
16542
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
16544
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
16545
UNSPEC_TRN1 UNSPEC_TRN2
16546
UNSPEC_UZP1 UNSPEC_UZP2])
16548
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
16549
- UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
16550
+ UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
16553
(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
16555
+ UNSPEC_FRINTA UNSPEC_FRINTN])
16557
+(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
16559
;; -------------------------------------------------------------------
16560
;; Int Iterators Attributes.
16561
;; -------------------------------------------------------------------
16562
-(define_int_attr maxminv [(UNSPEC_UMAXV "umax")
16563
- (UNSPEC_UMINV "umin")
16564
- (UNSPEC_SMAXV "smax")
16565
- (UNSPEC_SMINV "smin")])
16566
+(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
16567
+ (UNSPEC_UMINV "umin")
16568
+ (UNSPEC_SMAXV "smax")
16569
+ (UNSPEC_SMINV "smin")
16570
+ (UNSPEC_FMAX "smax_nan")
16571
+ (UNSPEC_FMAXNMV "smax")
16572
+ (UNSPEC_FMAXV "smax_nan")
16573
+ (UNSPEC_FMIN "smin_nan")
16574
+ (UNSPEC_FMINNMV "smin")
16575
+ (UNSPEC_FMINV "smin_nan")])
16577
-(define_int_attr fmaxminv [(UNSPEC_FMAXV "max")
16578
- (UNSPEC_FMINV "min")])
16579
+(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
16580
+ (UNSPEC_UMINV "umin")
16581
+ (UNSPEC_SMAXV "smax")
16582
+ (UNSPEC_SMINV "smin")
16583
+ (UNSPEC_FMAX "fmax")
16584
+ (UNSPEC_FMAXNMV "fmaxnm")
16585
+ (UNSPEC_FMAXV "fmax")
16586
+ (UNSPEC_FMIN "fmin")
16587
+ (UNSPEC_FMINNMV "fminnm")
16588
+ (UNSPEC_FMINV "fmin")])
16590
-(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax")
16591
- (UNSPEC_FMIN "fmin")])
16593
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
16594
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
16595
(UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
16596
@@ -719,6 +783,7 @@
16597
(UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
16598
(UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
16599
(UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
16600
+ (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
16601
(UNSPEC_SSLI "s") (UNSPEC_USLI "u")
16602
(UNSPEC_SSRI "s") (UNSPEC_USRI "u")
16603
(UNSPEC_USRA "u") (UNSPEC_SSRA "s")
16604
@@ -768,12 +833,6 @@
16605
(UNSPEC_RADDHN2 "add")
16606
(UNSPEC_RSUBHN2 "sub")])
16608
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
16609
- (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
16610
- (UNSPEC_CMEQ "eq")
16611
- (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
16612
- (UNSPEC_CMTST "tst")])
16614
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
16615
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])
16617
@@ -783,15 +842,18 @@
16618
(UNSPEC_FRINTM "floor")
16619
(UNSPEC_FRINTI "nearbyint")
16620
(UNSPEC_FRINTX "rint")
16621
- (UNSPEC_FRINTA "round")])
16622
+ (UNSPEC_FRINTA "round")
16623
+ (UNSPEC_FRINTN "frintn")])
16625
;; frint suffix for floating-point rounding instructions.
16626
(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p")
16627
(UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i")
16628
- (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")])
16629
+ (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")
16630
+ (UNSPEC_FRINTN "n")])
16632
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
16633
- (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
16634
+ (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
16635
+ (UNSPEC_FRINTN "frintn")])
16637
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
16638
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
16639
@@ -800,3 +862,5 @@
16640
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
16641
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
16642
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
16644
+(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
16645
--- a/src/gcc/config/aarch64/aarch64.h
16646
+++ b/src/gcc/config/aarch64/aarch64.h
16647
@@ -709,6 +709,8 @@
16649
#define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y)
16651
+#define REVERSIBLE_CC_MODE(MODE) 1
16653
#define REVERSE_CONDITION(CODE, MODE) \
16654
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
16655
? reverse_condition_maybe_unordered (CODE) \
16656
--- a/src/gcc/config/arm/arm-tables.opt
16657
+++ b/src/gcc/config/arm/arm-tables.opt
16658
@@ -250,6 +250,9 @@
16659
Enum(processor_type) String(cortex-a15) Value(cortexa15)
16662
+Enum(processor_type) String(cortex-a53) Value(cortexa53)
16665
Enum(processor_type) String(cortex-r4) Value(cortexr4)
16668
@@ -259,6 +262,9 @@
16669
Enum(processor_type) String(cortex-r5) Value(cortexr5)
16672
+Enum(processor_type) String(cortex-r7) Value(cortexr7)
16675
Enum(processor_type) String(cortex-m4) Value(cortexm4)
16678
--- a/src/gcc/config/arm/arm.c
16679
+++ b/src/gcc/config/arm/arm.c
16680
@@ -173,6 +173,7 @@
16681
static tree arm_builtin_decl (unsigned, bool);
16682
static void emit_constant_insn (rtx cond, rtx pattern);
16683
static rtx emit_set_insn (rtx, rtx);
16684
+static rtx emit_multi_reg_push (unsigned long);
16685
static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
16687
static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
16688
@@ -620,6 +621,13 @@
16689
#undef TARGET_CLASS_LIKELY_SPILLED_P
16690
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
16692
+#undef TARGET_VECTORIZE_BUILTINS
16693
+#define TARGET_VECTORIZE_BUILTINS
16695
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
16696
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
16697
+ arm_builtin_vectorized_function
16699
#undef TARGET_VECTOR_ALIGNMENT
16700
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
16702
@@ -839,6 +847,10 @@
16703
int arm_arch_arm_hwdiv;
16704
int arm_arch_thumb_hwdiv;
16706
+/* Nonzero if we should use Neon to handle 64-bits operations rather
16707
+ than core registers. */
16708
+int prefer_neon_for_64bits = 0;
16710
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
16711
we must report the mode of the memory reference from
16712
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
16713
@@ -936,6 +948,7 @@
16714
false, /* Prefer LDRD/STRD. */
16715
{true, true}, /* Prefer non short circuit. */
16716
&arm_default_vec_cost, /* Vectorizer costs. */
16717
+ false /* Prefer Neon for 64-bits bitops. */
16720
const struct tune_params arm_fastmul_tune =
16721
@@ -950,6 +963,7 @@
16722
false, /* Prefer LDRD/STRD. */
16723
{true, true}, /* Prefer non short circuit. */
16724
&arm_default_vec_cost, /* Vectorizer costs. */
16725
+ false /* Prefer Neon for 64-bits bitops. */
16728
/* StrongARM has early execution of branches, so a sequence that is worth
16729
@@ -967,6 +981,7 @@
16730
false, /* Prefer LDRD/STRD. */
16731
{true, true}, /* Prefer non short circuit. */
16732
&arm_default_vec_cost, /* Vectorizer costs. */
16733
+ false /* Prefer Neon for 64-bits bitops. */
16736
const struct tune_params arm_xscale_tune =
16737
@@ -981,6 +996,7 @@
16738
false, /* Prefer LDRD/STRD. */
16739
{true, true}, /* Prefer non short circuit. */
16740
&arm_default_vec_cost, /* Vectorizer costs. */
16741
+ false /* Prefer Neon for 64-bits bitops. */
16744
const struct tune_params arm_9e_tune =
16745
@@ -995,6 +1011,7 @@
16746
false, /* Prefer LDRD/STRD. */
16747
{true, true}, /* Prefer non short circuit. */
16748
&arm_default_vec_cost, /* Vectorizer costs. */
16749
+ false /* Prefer Neon for 64-bits bitops. */
16752
const struct tune_params arm_v6t2_tune =
16753
@@ -1009,6 +1026,7 @@
16754
false, /* Prefer LDRD/STRD. */
16755
{true, true}, /* Prefer non short circuit. */
16756
&arm_default_vec_cost, /* Vectorizer costs. */
16757
+ false /* Prefer Neon for 64-bits bitops. */
16760
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
16761
@@ -1024,6 +1042,7 @@
16762
false, /* Prefer LDRD/STRD. */
16763
{true, true}, /* Prefer non short circuit. */
16764
&arm_default_vec_cost, /* Vectorizer costs. */
16765
+ false /* Prefer Neon for 64-bits bitops. */
16768
const struct tune_params arm_cortex_a15_tune =
16769
@@ -1038,6 +1057,7 @@
16770
true, /* Prefer LDRD/STRD. */
16771
{true, true}, /* Prefer non short circuit. */
16772
&arm_default_vec_cost, /* Vectorizer costs. */
16773
+ false /* Prefer Neon for 64-bits bitops. */
16776
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
16777
@@ -1055,6 +1075,7 @@
16778
false, /* Prefer LDRD/STRD. */
16779
{false, false}, /* Prefer non short circuit. */
16780
&arm_default_vec_cost, /* Vectorizer costs. */
16781
+ false /* Prefer Neon for 64-bits bitops. */
16784
const struct tune_params arm_cortex_a9_tune =
16785
@@ -1069,6 +1090,7 @@
16786
false, /* Prefer LDRD/STRD. */
16787
{true, true}, /* Prefer non short circuit. */
16788
&arm_default_vec_cost, /* Vectorizer costs. */
16789
+ false /* Prefer Neon for 64-bits bitops. */
16792
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
16793
@@ -1085,6 +1107,7 @@
16794
false, /* Prefer LDRD/STRD. */
16795
{false, false}, /* Prefer non short circuit. */
16796
&arm_default_vec_cost, /* Vectorizer costs. */
16797
+ false /* Prefer Neon for 64-bits bitops. */
16800
const struct tune_params arm_fa726te_tune =
16801
@@ -1099,6 +1122,7 @@
16802
false, /* Prefer LDRD/STRD. */
16803
{true, true}, /* Prefer non short circuit. */
16804
&arm_default_vec_cost, /* Vectorizer costs. */
16805
+ false /* Prefer Neon for 64-bits bitops. */
16809
@@ -2129,11 +2153,25 @@
16810
global_options.x_param_values,
16811
global_options_set.x_param_values);
16813
+ /* Use Neon to perform 64-bits operations rather than core
16815
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
16816
+ if (use_neon_for_64bits == 1)
16817
+ prefer_neon_for_64bits = true;
16819
/* Use the alternative scheduling-pressure algorithm by default. */
16820
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
16821
global_options.x_param_values,
16822
global_options_set.x_param_values);
16824
+ /* Disable shrink-wrap when optimizing function for size, since it tends to
16825
+ generate additional returns. */
16826
+ if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
16827
+ flag_shrink_wrap = false;
16828
+ /* TBD: Dwarf info for apcs frame is not handled yet. */
16829
+ if (TARGET_APCS_FRAME)
16830
+ flag_shrink_wrap = false;
16832
/* Register global variables with the garbage collector. */
16833
arm_add_gc_roots ();
16835
@@ -2382,6 +2420,10 @@
16836
if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
16839
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
16840
+ && !optimize_function_for_size_p (cfun))
16843
offsets = arm_get_frame_offsets ();
16844
stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16846
@@ -2479,6 +2521,18 @@
16850
+/* Return TRUE if we should try to use a simple_return insn, i.e. perform
16851
+ shrink-wrapping if possible. This is the case if we need to emit a
16852
+ prologue, which we can test by looking at the offsets. */
16854
+use_simple_return_p (void)
16856
+ arm_stack_offsets *offsets;
16858
+ offsets = arm_get_frame_offsets ();
16859
+ return offsets->outgoing_args != 0;
16862
/* Return TRUE if int I is a valid immediate ARM constant. */
16865
@@ -2617,6 +2671,9 @@
16870
+ return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
16871
+ && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
16873
return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
16875
@@ -5335,9 +5392,8 @@
16876
if (cfun->machine->sibcall_blocked)
16879
- /* Never tailcall something for which we have no decl, or if we
16880
- are generating code for Thumb-1. */
16881
- if (decl == NULL || TARGET_THUMB1)
16882
+ /* Never tailcall something if we are generating code for Thumb-1. */
16883
+ if (TARGET_THUMB1)
16886
/* The PIC register is live on entry to VxWorks PLT entries, so we
16887
@@ -5347,13 +5403,14 @@
16889
/* Cannot tail-call to long calls, since these are out of range of
16890
a branch instruction. */
16891
- if (arm_is_long_call_p (decl))
16892
+ if (decl && arm_is_long_call_p (decl))
16895
/* If we are interworking and the function is not declared static
16896
then we can't tail-call it unless we know that it exists in this
16897
compilation unit (since it might be a Thumb routine). */
16898
- if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
16899
+ if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
16900
+ && !TREE_ASM_WRITTEN (decl))
16903
func_type = arm_current_func_type ();
16904
@@ -5385,6 +5442,7 @@
16906
if (TARGET_AAPCS_BASED
16907
&& arm_abi == ARM_ABI_AAPCS
16909
&& DECL_WEAK (decl))
16912
@@ -11803,6 +11861,134 @@
16916
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
16919
+next_consecutive_mem (rtx mem)
16921
+ enum machine_mode mode = GET_MODE (mem);
16922
+ HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
16923
+ rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
16925
+ return adjust_automodify_address (mem, mode, addr, offset);
16928
+/* Copy using LDRD/STRD instructions whenever possible.
16929
+ Returns true upon success. */
16931
+gen_movmem_ldrd_strd (rtx *operands)
16933
+ unsigned HOST_WIDE_INT len;
16934
+ HOST_WIDE_INT align;
16935
+ rtx src, dst, base;
16937
+ bool src_aligned, dst_aligned;
16938
+ bool src_volatile, dst_volatile;
16940
+ gcc_assert (CONST_INT_P (operands[2]));
16941
+ gcc_assert (CONST_INT_P (operands[3]));
16943
+ len = UINTVAL (operands[2]);
16947
+ /* Maximum alignment we can assume for both src and dst buffers. */
16948
+ align = INTVAL (operands[3]);
16950
+ if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
16953
+ /* Place src and dst addresses in registers
16954
+ and update the corresponding mem rtx. */
16955
+ dst = operands[0];
16956
+ dst_volatile = MEM_VOLATILE_P (dst);
16957
+ dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
16958
+ base = copy_to_mode_reg (SImode, XEXP (dst, 0));
16959
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16961
+ src = operands[1];
16962
+ src_volatile = MEM_VOLATILE_P (src);
16963
+ src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
16964
+ base = copy_to_mode_reg (SImode, XEXP (src, 0));
16965
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
16967
+ if (!unaligned_access && !(src_aligned && dst_aligned))
16970
+ if (src_volatile || dst_volatile)
16973
+ /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
16974
+ if (!(dst_aligned || src_aligned))
16975
+ return arm_gen_movmemqi (operands);
16977
+ src = adjust_address (src, DImode, 0);
16978
+ dst = adjust_address (dst, DImode, 0);
16982
+ reg0 = gen_reg_rtx (DImode);
16984
+ emit_move_insn (reg0, src);
16986
+ emit_insn (gen_unaligned_loaddi (reg0, src));
16989
+ emit_move_insn (dst, reg0);
16991
+ emit_insn (gen_unaligned_storedi (dst, reg0));
16993
+ src = next_consecutive_mem (src);
16994
+ dst = next_consecutive_mem (dst);
16997
+ gcc_assert (len < 8);
17000
+ /* More than a word but less than a double-word to copy. Copy a word. */
17001
+ reg0 = gen_reg_rtx (SImode);
17002
+ src = adjust_address (src, SImode, 0);
17003
+ dst = adjust_address (dst, SImode, 0);
17005
+ emit_move_insn (reg0, src);
17007
+ emit_insn (gen_unaligned_loadsi (reg0, src));
17010
+ emit_move_insn (dst, reg0);
17012
+ emit_insn (gen_unaligned_storesi (dst, reg0));
17014
+ src = next_consecutive_mem (src);
17015
+ dst = next_consecutive_mem (dst);
17022
+ /* Copy the remaining bytes. */
17025
+ dst = adjust_address (dst, HImode, 0);
17026
+ src = adjust_address (src, HImode, 0);
17027
+ reg0 = gen_reg_rtx (SImode);
17028
+ emit_insn (gen_unaligned_loadhiu (reg0, src));
17029
+ emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
17030
+ src = next_consecutive_mem (src);
17031
+ dst = next_consecutive_mem (dst);
17036
+ dst = adjust_address (dst, QImode, 0);
17037
+ src = adjust_address (src, QImode, 0);
17038
+ reg0 = gen_reg_rtx (QImode);
17039
+ emit_move_insn (reg0, src);
17040
+ emit_move_insn (dst, reg0);
17044
/* Select a dominance comparison mode if possible for a test of the general
17045
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
17046
COND_OR == DOM_CC_X_AND_Y => (X && Y)
17047
@@ -12603,6 +12789,277 @@
17051
+/* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17052
+ operand ADDR is an immediate offset from the base register and is
17053
+ not volatile, in which case it sets BASE and OFFSET
17056
+mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
17058
+ /* TODO: Handle more general memory operand patterns, such as
17059
+ PRE_DEC and PRE_INC. */
17061
+ /* Convert a subreg of mem into mem itself. */
17062
+ if (GET_CODE (addr) == SUBREG)
17063
+ addr = alter_subreg (&addr, true);
17065
+ gcc_assert (MEM_P (addr));
17067
+ /* Don't modify volatile memory accesses. */
17068
+ if (MEM_VOLATILE_P (addr))
17071
+ *offset = const0_rtx;
17073
+ addr = XEXP (addr, 0);
17074
+ if (REG_P (addr))
17079
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
17081
+ *base = XEXP (addr, 0);
17082
+ *offset = XEXP (addr, 1);
17083
+ return (REG_P (*base) && CONST_INT_P (*offset));
17089
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
17091
+/* Called from a peephole2 to replace two word-size accesses with a
17092
+ single LDRD/STRD instruction. Returns true iff we can generate a
17093
+ new instruction sequence. That is, both accesses use the same base
17094
+ register and the gap between constant offsets is 4. This function
17095
+ may reorder its operands to match ldrd/strd RTL templates.
17096
+ OPERANDS are the operands found by the peephole matcher;
17097
+ OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17098
+ corresponding memory operands. LOAD indicaates whether the access
17099
+ is load or store. CONST_STORE indicates a store of constant
17100
+ integer values held in OPERANDS[4,5] and assumes that the pattern
17101
+ is of length 4 insn, for the purpose of checking dead registers.
17102
+ COMMUTE indicates that register operands may be reordered. */
17104
+gen_operands_ldrd_strd (rtx *operands, bool load,
17105
+ bool const_store, bool commute)
17108
+ HOST_WIDE_INT offsets[2], offset;
17109
+ rtx base = NULL_RTX;
17110
+ rtx cur_base, cur_offset, tmp;
17112
+ HARD_REG_SET regset;
17114
+ gcc_assert (!const_store || !load);
17115
+ /* Check that the memory references are immediate offsets from the
17116
+ same base register. Extract the base register, the destination
17117
+ registers, and the corresponding memory offsets. */
17118
+ for (i = 0; i < nops; i++)
17120
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
17125
+ else if (REGNO (base) != REGNO (cur_base))
17128
+ offsets[i] = INTVAL (cur_offset);
17129
+ if (GET_CODE (operands[i]) == SUBREG)
17131
+ tmp = SUBREG_REG (operands[i]);
17132
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17133
+ operands[i] = tmp;
17137
+ /* Make sure there is no dependency between the individual loads. */
17138
+ if (load && REGNO (operands[0]) == REGNO (base))
17139
+ return false; /* RAW */
17141
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
17142
+ return false; /* WAW */
17144
+ /* If the same input register is used in both stores
17145
+ when storing different constants, try to find a free register.
17146
+ For example, the code
17151
+ can be transformed into
17153
+ strd r1, r0, [r2]
17154
+ in Thumb mode assuming that r1 is free. */
17156
+ && REGNO (operands[0]) == REGNO (operands[1])
17157
+ && INTVAL (operands[4]) != INTVAL (operands[5]))
17159
+ if (TARGET_THUMB2)
17161
+ CLEAR_HARD_REG_SET (regset);
17162
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
17163
+ if (tmp == NULL_RTX)
17166
+ /* Use the new register in the first load to ensure that
17167
+ if the original input register is not dead after peephole,
17168
+ then it will have the correct constant value. */
17169
+ operands[0] = tmp;
17171
+ else if (TARGET_ARM)
17174
+ int regno = REGNO (operands[0]);
17175
+ if (!peep2_reg_dead_p (4, operands[0]))
17177
+ /* When the input register is even and is not dead after the
17178
+ pattern, it has to hold the second constant but we cannot
17179
+ form a legal STRD in ARM mode with this register as the second
17181
+ if (regno % 2 == 0)
17184
+ /* Is regno-1 free? */
17185
+ SET_HARD_REG_SET (regset);
17186
+ CLEAR_HARD_REG_BIT(regset, regno - 1);
17187
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
17188
+ if (tmp == NULL_RTX)
17191
+ operands[0] = tmp;
17195
+ /* Find a DImode register. */
17196
+ CLEAR_HARD_REG_SET (regset);
17197
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
17198
+ if (tmp != NULL_RTX)
17200
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17201
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17205
+ /* Can we use the input register to form a DI register? */
17206
+ SET_HARD_REG_SET (regset);
17207
+ CLEAR_HARD_REG_BIT(regset,
17208
+ regno % 2 == 0 ? regno + 1 : regno - 1);
17209
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
17210
+ if (tmp == NULL_RTX)
17212
+ operands[regno % 2 == 1 ? 0 : 1] = tmp;
17216
+ gcc_assert (operands[0] != NULL_RTX);
17217
+ gcc_assert (operands[1] != NULL_RTX);
17218
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
17219
+ gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17223
+ /* Make sure the instructions are ordered with lower memory access first. */
17224
+ if (offsets[0] > offsets[1])
17226
+ gap = offsets[0] - offsets[1];
17227
+ offset = offsets[1];
17229
+ /* Swap the instructions such that lower memory is accessed first. */
17230
+ SWAP_RTX (operands[0], operands[1]);
17231
+ SWAP_RTX (operands[2], operands[3]);
17233
+ SWAP_RTX (operands[4], operands[5]);
17237
+ gap = offsets[1] - offsets[0];
17238
+ offset = offsets[0];
17241
+ /* Make sure accesses are to consecutive memory locations. */
17245
+ /* Make sure we generate legal instructions. */
17246
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17250
+ /* In Thumb state, where registers are almost unconstrained, there
17251
+ is little hope to fix it. */
17252
+ if (TARGET_THUMB2)
17255
+ if (load && commute)
17257
+ /* Try reordering registers. */
17258
+ SWAP_RTX (operands[0], operands[1]);
17259
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17266
+ /* If input registers are dead after this pattern, they can be
17267
+ reordered or replaced by other registers that are free in the
17268
+ current pattern. */
17269
+ if (!peep2_reg_dead_p (4, operands[0])
17270
+ || !peep2_reg_dead_p (4, operands[1]))
17273
+ /* Try to reorder the input registers. */
17274
+ /* For example, the code
17279
+ can be transformed into
17284
+ if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17287
+ SWAP_RTX (operands[0], operands[1]);
17291
+ /* Try to find a free DI register. */
17292
+ CLEAR_HARD_REG_SET (regset);
17293
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
17294
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
17297
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
17298
+ if (tmp == NULL_RTX)
17301
+ /* DREG must be an even-numbered register in DImode.
17302
+ Split it into SI registers. */
17303
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17304
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17305
+ gcc_assert (operands[0] != NULL_RTX);
17306
+ gcc_assert (operands[1] != NULL_RTX);
17307
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
17308
+ gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17310
+ return (operands_ok_ldrd_strd (operands[0], operands[1],
17323
/* Print a symbolic form of X to the debug file, F. */
17325
@@ -14794,7 +15251,8 @@
17327
/* Constraints should ensure this. */
17328
gcc_assert (code0 == MEM && code1 == REG);
17329
- gcc_assert (REGNO (operands[1]) != IP_REGNUM);
17330
+ gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17331
+ || (TARGET_ARM && TARGET_LDRD));
17333
switch (GET_CODE (XEXP (operands[0], 0)))
17335
@@ -16387,6 +16845,148 @@
17339
+/* STRD in ARM mode requires consecutive registers. This function emits STRD
17340
+ whenever possible, otherwise it emits single-word stores. The first store
17341
+ also allocates stack space for all saved registers, using writeback with
17342
+ post-addressing mode. All other stores use offset addressing. If no STRD
17343
+ can be emitted, this function emits a sequence of single-word stores,
17344
+ and not an STM as before, because single-word stores provide more freedom
17345
+ scheduling and can be turned into an STM by peephole optimizations. */
17347
+arm_emit_strd_push (unsigned long saved_regs_mask)
17349
+ int num_regs = 0;
17350
+ int i, j, dwarf_index = 0;
17352
+ rtx dwarf = NULL_RTX;
17353
+ rtx insn = NULL_RTX;
17356
+ /* TODO: A more efficient code can be emitted by changing the
17357
+ layout, e.g., first push all pairs that can use STRD to keep the
17358
+ stack aligned, and then push all other registers. */
17359
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
17360
+ if (saved_regs_mask & (1 << i))
17363
+ gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17364
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
17365
+ gcc_assert (num_regs > 0);
17367
+ /* Create sequence for DWARF info. */
17368
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
17370
+ /* For dwarf info, we generate explicit stack update. */
17371
+ tmp = gen_rtx_SET (VOIDmode,
17372
+ stack_pointer_rtx,
17373
+ plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17374
+ RTX_FRAME_RELATED_P (tmp) = 1;
17375
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17377
+ /* Save registers. */
17378
+ offset = - 4 * num_regs;
17380
+ while (j <= LAST_ARM_REGNUM)
17381
+ if (saved_regs_mask & (1 << j))
17384
+ && (saved_regs_mask & (1 << (j + 1))))
17386
+ /* Current register and previous register form register pair for
17387
+ which STRD can be generated. */
17390
+ /* Allocate stack space for all saved registers. */
17391
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17392
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17393
+ mem = gen_frame_mem (DImode, tmp);
17396
+ else if (offset > 0)
17397
+ mem = gen_frame_mem (DImode,
17398
+ plus_constant (Pmode,
17399
+ stack_pointer_rtx,
17402
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
17404
+ tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
17405
+ RTX_FRAME_RELATED_P (tmp) = 1;
17406
+ tmp = emit_insn (tmp);
17408
+ /* Record the first store insn. */
17409
+ if (dwarf_index == 1)
17412
+ /* Generate dwarf info. */
17413
+ mem = gen_frame_mem (SImode,
17414
+ plus_constant (Pmode,
17415
+ stack_pointer_rtx,
17417
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17418
+ RTX_FRAME_RELATED_P (tmp) = 1;
17419
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17421
+ mem = gen_frame_mem (SImode,
17422
+ plus_constant (Pmode,
17423
+ stack_pointer_rtx,
17425
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
17426
+ RTX_FRAME_RELATED_P (tmp) = 1;
17427
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17434
+ /* Emit a single word store. */
17437
+ /* Allocate stack space for all saved registers. */
17438
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17439
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17440
+ mem = gen_frame_mem (SImode, tmp);
17443
+ else if (offset > 0)
17444
+ mem = gen_frame_mem (SImode,
17445
+ plus_constant (Pmode,
17446
+ stack_pointer_rtx,
17449
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
17451
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17452
+ RTX_FRAME_RELATED_P (tmp) = 1;
17453
+ tmp = emit_insn (tmp);
17455
+ /* Record the first store insn. */
17456
+ if (dwarf_index == 1)
17459
+ /* Generate dwarf info. */
17460
+ mem = gen_frame_mem (SImode,
17461
+ plus_constant(Pmode,
17462
+ stack_pointer_rtx,
17464
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17465
+ RTX_FRAME_RELATED_P (tmp) = 1;
17466
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17475
+ /* Attach dwarf info to the first insn we generate. */
17476
+ gcc_assert (insn != NULL_RTX);
17477
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17478
+ RTX_FRAME_RELATED_P (insn) = 1;
17481
/* Generate and emit an insn that we will recognize as a push_multi.
17482
Unfortunately, since this insn does not reflect very well the actual
17483
semantics of the operation, we need to annotate the insn for the benefit
17484
@@ -16529,6 +17129,19 @@
17488
+/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
17489
+ SIZE is the offset to be adjusted.
17490
+ DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
17492
+arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
17496
+ RTX_FRAME_RELATED_P (insn) = 1;
17497
+ dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
17498
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
17501
/* Generate and emit an insn pattern that we will recognize as a pop_multi.
17502
SAVED_REGS_MASK shows which registers need to be restored.
17504
@@ -16586,6 +17199,17 @@
17505
if (saved_regs_mask & (1 << i))
17507
reg = gen_rtx_REG (SImode, i);
17508
+ if ((num_regs == 1) && emit_update && !return_in_pc)
17510
+ /* Emit single load with writeback. */
17511
+ tmp = gen_frame_mem (SImode,
17512
+ gen_rtx_POST_INC (Pmode,
17513
+ stack_pointer_rtx));
17514
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
17515
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17519
tmp = gen_rtx_SET (VOIDmode,
17522
@@ -16608,6 +17232,9 @@
17523
par = emit_insn (par);
17525
REG_NOTES (par) = dwarf;
17526
+ if (!return_in_pc)
17527
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
17528
+ stack_pointer_rtx, stack_pointer_rtx);
17531
/* Generate and emit an insn pattern that we will recognize as a pop_multi
17532
@@ -16678,6 +17305,9 @@
17534
par = emit_insn (par);
17535
REG_NOTES (par) = dwarf;
17537
+ arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
17538
+ base_reg, base_reg);
17541
/* Generate and emit a pattern that will be recognized as LDRD pattern. If even
17542
@@ -16753,6 +17383,7 @@
17543
pattern can be emitted now. */
17544
par = emit_insn (par);
17545
REG_NOTES (par) = dwarf;
17546
+ RTX_FRAME_RELATED_P (par) = 1;
17550
@@ -16769,7 +17400,12 @@
17552
plus_constant (Pmode, stack_pointer_rtx, 4 * i));
17553
RTX_FRAME_RELATED_P (tmp) = 1;
17555
+ tmp = emit_insn (tmp);
17556
+ if (!return_in_pc)
17558
+ arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
17559
+ stack_pointer_rtx, stack_pointer_rtx);
17564
@@ -16803,9 +17439,11 @@
17567
par = emit_insn (tmp);
17568
+ REG_NOTES (par) = dwarf;
17569
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17570
+ stack_pointer_rtx, stack_pointer_rtx);
17573
- REG_NOTES (par) = dwarf;
17575
else if ((num_regs % 2) == 1 && return_in_pc)
17577
@@ -16817,6 +17455,129 @@
17581
+/* LDRD in ARM mode needs consecutive registers as operands. This function
17582
+ emits LDRD whenever possible, otherwise it emits single-word loads. It uses
17583
+ offset addressing and then generates one separate stack udpate. This provides
17584
+ more scheduling freedom, compared to writeback on every load. However,
17585
+ if the function returns using load into PC directly
17586
+ (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
17587
+ before the last load. TODO: Add a peephole optimization to recognize
17588
+ the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
17589
+ peephole optimization to merge the load at stack-offset zero
17590
+ with the stack update instruction using load with writeback
17591
+ in post-index addressing mode. */
17593
+arm_emit_ldrd_pop (unsigned long saved_regs_mask)
17597
+ rtx par = NULL_RTX;
17598
+ rtx dwarf = NULL_RTX;
17601
+ /* Restore saved registers. */
17602
+ gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
17604
+ while (j <= LAST_ARM_REGNUM)
17605
+ if (saved_regs_mask & (1 << j))
17608
+ && (saved_regs_mask & (1 << (j + 1)))
17609
+ && (j + 1) != PC_REGNUM)
17611
+ /* Current register and next register form register pair for which
17612
+ LDRD can be generated. PC is always the last register popped, and
17613
+ we handle it separately. */
17615
+ mem = gen_frame_mem (DImode,
17616
+ plus_constant (Pmode,
17617
+ stack_pointer_rtx,
17620
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
17622
+ tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
17623
+ RTX_FRAME_RELATED_P (tmp) = 1;
17624
+ tmp = emit_insn (tmp);
17626
+ /* Generate dwarf info. */
17628
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
17629
+ gen_rtx_REG (SImode, j),
17631
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
17632
+ gen_rtx_REG (SImode, j + 1),
17635
+ REG_NOTES (tmp) = dwarf;
17640
+ else if (j != PC_REGNUM)
17642
+ /* Emit a single word load. */
17644
+ mem = gen_frame_mem (SImode,
17645
+ plus_constant (Pmode,
17646
+ stack_pointer_rtx,
17649
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
17651
+ tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
17652
+ RTX_FRAME_RELATED_P (tmp) = 1;
17653
+ tmp = emit_insn (tmp);
17655
+ /* Generate dwarf info. */
17656
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
17657
+ gen_rtx_REG (SImode, j),
17663
+ else /* j == PC_REGNUM */
17669
+ /* Update the stack. */
17672
+ tmp = gen_rtx_SET (Pmode,
17673
+ stack_pointer_rtx,
17674
+ plus_constant (Pmode,
17675
+ stack_pointer_rtx,
17677
+ RTX_FRAME_RELATED_P (tmp) = 1;
17682
+ if (saved_regs_mask & (1 << PC_REGNUM))
17684
+ /* Only PC is to be popped. */
17685
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17686
+ XVECEXP (par, 0, 0) = ret_rtx;
17687
+ tmp = gen_rtx_SET (SImode,
17688
+ gen_rtx_REG (SImode, PC_REGNUM),
17689
+ gen_frame_mem (SImode,
17690
+ gen_rtx_POST_INC (SImode,
17691
+ stack_pointer_rtx)));
17692
+ RTX_FRAME_RELATED_P (tmp) = 1;
17693
+ XVECEXP (par, 0, 1) = tmp;
17694
+ par = emit_jump_insn (par);
17696
+ /* Generate dwarf info. */
17697
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
17698
+ gen_rtx_REG (SImode, PC_REGNUM),
17700
+ REG_NOTES (par) = dwarf;
17704
/* Calculate the size of the return value that is passed in registers. */
17706
arm_size_return_regs (void)
17707
@@ -16841,11 +17602,27 @@
17708
|| df_regs_ever_live_p (LR_REGNUM));
17711
+/* We do not know if r3 will be available because
17712
+ we do have an indirect tailcall happening in this
17713
+ particular case. */
17715
+is_indirect_tailcall_p (rtx call)
17717
+ rtx pat = PATTERN (call);
17719
+ /* Indirect tail call. */
17720
+ pat = XVECEXP (pat, 0, 0);
17721
+ if (GET_CODE (pat) == SET)
17722
+ pat = SET_SRC (pat);
17724
+ pat = XEXP (XEXP (pat, 0), 0);
17725
+ return REG_P (pat);
17728
/* Return true if r3 is used by any of the tail call insns in the
17729
current function. */
17731
-any_sibcall_uses_r3 (void)
17732
+any_sibcall_could_use_r3 (void)
17736
@@ -16859,7 +17636,8 @@
17737
if (!CALL_P (call))
17738
call = prev_nonnote_nondebug_insn (call);
17739
gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
17740
- if (find_regno_fusage (call, USE, 3))
17741
+ if (find_regno_fusage (call, USE, 3)
17742
+ || is_indirect_tailcall_p (call))
17746
@@ -17026,9 +17804,10 @@
17747
/* If it is safe to use r3, then do so. This sometimes
17748
generates better code on Thumb-2 by avoiding the need to
17749
use 32-bit push/pop instructions. */
17750
- if (! any_sibcall_uses_r3 ()
17751
+ if (! any_sibcall_could_use_r3 ()
17752
&& arm_size_return_regs () <= 12
17753
- && (offsets->saved_regs_mask & (1 << 3)) == 0)
17754
+ && (offsets->saved_regs_mask & (1 << 3)) == 0
17755
+ && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
17759
@@ -17460,6 +18239,12 @@
17761
thumb2_emit_strd_push (live_regs_mask);
17763
+ else if (TARGET_ARM
17764
+ && !TARGET_APCS_FRAME
17765
+ && !IS_INTERRUPT (func_type))
17767
+ arm_emit_strd_push (live_regs_mask);
17771
insn = emit_multi_reg_push (live_regs_mask);
17772
@@ -19339,6 +20124,7 @@
17780
@@ -19356,14 +20142,15 @@
17781
#define TYPE_MODE_BIT(X) (1 << (X))
17783
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17784
- | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17785
- | TYPE_MODE_BIT (T_DI))
17786
+ | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
17787
+ | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
17788
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17789
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17790
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17792
#define v8qi_UP T_V8QI
17793
#define v4hi_UP T_V4HI
17794
+#define v4hf_UP T_V4HF
17795
#define v2si_UP T_V2SI
17796
#define v2sf_UP T_V2SF
17798
@@ -19399,6 +20186,8 @@
17802
+ NEON_FLOAT_WIDEN,
17803
+ NEON_FLOAT_NARROW,
17807
@@ -19459,7 +20248,8 @@
17808
VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17809
{#N, NEON_##T, UP (J), CF (N, J), 0}
17811
-/* The mode entries in the following table correspond to the "key" type of the
17812
+/* The NEON builtin data can be found in arm_neon_builtins.def.
17813
+ The mode entries in the following table correspond to the "key" type of the
17814
instruction variant, i.e. equivalent to that which would be specified after
17815
the assembler mnemonic, which usually refers to the last vector operand.
17816
(Signed/unsigned/polynomial types are not differentiated between though, and
17817
@@ -19469,196 +20259,7 @@
17819
static neon_builtin_datum neon_builtin_data[] =
17821
- VAR10 (BINOP, vadd,
17822
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17823
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17824
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17825
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17826
- VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17827
- VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17828
- VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17829
- VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17830
- VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17831
- VAR2 (TERNOP, vfma, v2sf, v4sf),
17832
- VAR2 (TERNOP, vfms, v2sf, v4sf),
17833
- VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17834
- VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17835
- VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17836
- VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17837
- VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17838
- VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17839
- VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17840
- VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17841
- VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17842
- VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17843
- VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17844
- VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17845
- VAR2 (BINOP, vqdmull, v4hi, v2si),
17846
- VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17847
- VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17848
- VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17849
- VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17850
- VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17851
- VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17852
- VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17853
- VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17854
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17855
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17856
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17857
- VAR10 (BINOP, vsub,
17858
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17859
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17860
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17861
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17862
- VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17863
- VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17864
- VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17865
- VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17866
- VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17867
- VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17868
- VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17869
- VAR2 (BINOP, vcage, v2sf, v4sf),
17870
- VAR2 (BINOP, vcagt, v2sf, v4sf),
17871
- VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17872
- VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17873
- VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17874
- VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17875
- VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17876
- VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17877
- VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17878
- VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17879
- VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17880
- VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17881
- VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17882
- VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17883
- VAR2 (BINOP, vrecps, v2sf, v4sf),
17884
- VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17885
- VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17886
- VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17887
- VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17888
- VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17889
- VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17890
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17891
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17892
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17893
- VAR2 (UNOP, vcnt, v8qi, v16qi),
17894
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
17895
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
17896
- VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17897
- /* FIXME: vget_lane supports more variants than this! */
17898
- VAR10 (GETLANE, vget_lane,
17899
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17900
- VAR10 (SETLANE, vset_lane,
17901
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17902
- VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
17903
- VAR10 (DUP, vdup_n,
17904
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17905
- VAR10 (DUPLANE, vdup_lane,
17906
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17907
- VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
17908
- VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
17909
- VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
17910
- VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
17911
- VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
17912
- VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
17913
- VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
17914
- VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17915
- VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17916
- VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
17917
- VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
17918
- VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17919
- VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
17920
- VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
17921
- VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17922
- VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17923
- VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
17924
- VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
17925
- VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17926
- VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
17927
- VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
17928
- VAR10 (BINOP, vext,
17929
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17930
- VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17931
- VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
17932
- VAR2 (UNOP, vrev16, v8qi, v16qi),
17933
- VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
17934
- VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
17935
- VAR10 (SELECT, vbsl,
17936
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17937
- VAR2 (RINT, vrintn, v2sf, v4sf),
17938
- VAR2 (RINT, vrinta, v2sf, v4sf),
17939
- VAR2 (RINT, vrintp, v2sf, v4sf),
17940
- VAR2 (RINT, vrintm, v2sf, v4sf),
17941
- VAR2 (RINT, vrintz, v2sf, v4sf),
17942
- VAR2 (RINT, vrintx, v2sf, v4sf),
17943
- VAR1 (VTBL, vtbl1, v8qi),
17944
- VAR1 (VTBL, vtbl2, v8qi),
17945
- VAR1 (VTBL, vtbl3, v8qi),
17946
- VAR1 (VTBL, vtbl4, v8qi),
17947
- VAR1 (VTBX, vtbx1, v8qi),
17948
- VAR1 (VTBX, vtbx2, v8qi),
17949
- VAR1 (VTBX, vtbx3, v8qi),
17950
- VAR1 (VTBX, vtbx4, v8qi),
17951
- VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17952
- VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17953
- VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17954
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
17955
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
17956
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
17957
- VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
17958
- VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
17959
- VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
17960
- VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
17961
- VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
17962
- VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
17963
- VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
17964
- VAR10 (LOAD1, vld1,
17965
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17966
- VAR10 (LOAD1LANE, vld1_lane,
17967
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17968
- VAR10 (LOAD1, vld1_dup,
17969
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17970
- VAR10 (STORE1, vst1,
17971
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17972
- VAR10 (STORE1LANE, vst1_lane,
17973
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17974
- VAR9 (LOADSTRUCT,
17975
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17976
- VAR7 (LOADSTRUCTLANE, vld2_lane,
17977
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17978
- VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
17979
- VAR9 (STORESTRUCT, vst2,
17980
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17981
- VAR7 (STORESTRUCTLANE, vst2_lane,
17982
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17983
- VAR9 (LOADSTRUCT,
17984
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17985
- VAR7 (LOADSTRUCTLANE, vld3_lane,
17986
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17987
- VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
17988
- VAR9 (STORESTRUCT, vst3,
17989
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17990
- VAR7 (STORESTRUCTLANE, vst3_lane,
17991
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17992
- VAR9 (LOADSTRUCT, vld4,
17993
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17994
- VAR7 (LOADSTRUCTLANE, vld4_lane,
17995
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17996
- VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
17997
- VAR9 (STORESTRUCT, vst4,
17998
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17999
- VAR7 (STORESTRUCTLANE, vst4_lane,
18000
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18001
- VAR10 (LOGICBINOP, vand,
18002
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18003
- VAR10 (LOGICBINOP, vorr,
18004
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18005
- VAR10 (BINOP, veor,
18006
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18007
- VAR10 (LOGICBINOP, vbic,
18008
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18009
- VAR10 (LOGICBINOP, vorn,
18010
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18011
+#include "arm_neon_builtins.def"
18015
@@ -19673,9 +20274,36 @@
18019
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18020
- symbolic names defined here (which would require too much duplication).
18022
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
18023
+#define VAR1(T, N, A) \
18025
+#define VAR2(T, N, A, B) \
18026
+ VAR1 (T, N, A), \
18028
+#define VAR3(T, N, A, B, C) \
18029
+ VAR2 (T, N, A, B), \
18031
+#define VAR4(T, N, A, B, C, D) \
18032
+ VAR3 (T, N, A, B, C), \
18034
+#define VAR5(T, N, A, B, C, D, E) \
18035
+ VAR4 (T, N, A, B, C, D), \
18037
+#define VAR6(T, N, A, B, C, D, E, F) \
18038
+ VAR5 (T, N, A, B, C, D, E), \
18040
+#define VAR7(T, N, A, B, C, D, E, F, G) \
18041
+ VAR6 (T, N, A, B, C, D, E, F), \
18043
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
18044
+ VAR7 (T, N, A, B, C, D, E, F, G), \
18046
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18047
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
18049
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18050
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18054
ARM_BUILTIN_GETWCGR0,
18055
@@ -19924,11 +20552,25 @@
18057
ARM_BUILTIN_WMERGE,
18059
- ARM_BUILTIN_NEON_BASE,
18060
+#include "arm_neon_builtins.def"
18062
- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18066
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
18080
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18083
@@ -19939,6 +20581,7 @@
18085
tree neon_intQI_type_node;
18086
tree neon_intHI_type_node;
18087
+ tree neon_floatHF_type_node;
18088
tree neon_polyQI_type_node;
18089
tree neon_polyHI_type_node;
18090
tree neon_intSI_type_node;
18091
@@ -19965,6 +20608,7 @@
18093
tree V8QI_type_node;
18094
tree V4HI_type_node;
18095
+ tree V4HF_type_node;
18096
tree V2SI_type_node;
18097
tree V2SF_type_node;
18098
tree V16QI_type_node;
18099
@@ -20019,6 +20663,9 @@
18100
neon_float_type_node = make_node (REAL_TYPE);
18101
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18102
layout_type (neon_float_type_node);
18103
+ neon_floatHF_type_node = make_node (REAL_TYPE);
18104
+ TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
18105
+ layout_type (neon_floatHF_type_node);
18107
/* Define typedefs which exactly correspond to the modes we are basing vector
18108
types on. If you change these names you'll need to change
18109
@@ -20027,6 +20674,8 @@
18110
"__builtin_neon_qi");
18111
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18112
"__builtin_neon_hi");
18113
+ (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
18114
+ "__builtin_neon_hf");
18115
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18116
"__builtin_neon_si");
18117
(*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18118
@@ -20068,6 +20717,8 @@
18119
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18121
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18123
+ build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
18125
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18127
@@ -20190,7 +20841,7 @@
18128
neon_builtin_datum *d = &neon_builtin_data[i];
18130
const char* const modenames[] = {
18131
- "v8qi", "v4hi", "v2si", "v2sf", "di",
18132
+ "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
18133
"v16qi", "v8hi", "v4si", "v4sf", "v2di",
18136
@@ -20393,8 +21044,9 @@
18137
case NEON_REINTERP:
18139
/* We iterate over 5 doubleword types, then 5 quadword
18141
- int rhs = d->mode % 5;
18142
+ types. V4HF is not a type used in reinterpret, so we translate
18143
+ d->mode to the correct index in reinterp_ftype_dreg. */
18144
+ int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
18145
switch (insn_data[d->code].operand[0].mode)
18147
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18148
@@ -20411,7 +21063,38 @@
18152
+ case NEON_FLOAT_WIDEN:
18154
+ tree eltype = NULL_TREE;
18155
+ tree return_type = NULL_TREE;
18157
+ switch (insn_data[d->code].operand[1].mode)
18160
+ eltype = V4HF_type_node;
18161
+ return_type = V4SF_type_node;
18163
+ default: gcc_unreachable ();
18165
+ ftype = build_function_type_list (return_type, eltype, NULL);
18168
+ case NEON_FLOAT_NARROW:
18170
+ tree eltype = NULL_TREE;
18171
+ tree return_type = NULL_TREE;
18173
+ switch (insn_data[d->code].operand[1].mode)
18176
+ eltype = V4SF_type_node;
18177
+ return_type = V4HF_type_node;
18179
+ default: gcc_unreachable ();
18181
+ ftype = build_function_type_list (return_type, eltype, NULL);
18185
gcc_unreachable ();
18187
@@ -21408,6 +22091,8 @@
18191
+ case NEON_FLOAT_WIDEN:
18192
+ case NEON_FLOAT_NARROW:
18193
case NEON_REINTERP:
18194
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
18195
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18196
@@ -21605,7 +22290,7 @@
18200
- int fcode = DECL_FUNCTION_CODE (fndecl);
18201
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18203
enum machine_mode tmode;
18204
enum machine_mode mode0;
18205
@@ -23322,7 +24007,7 @@
18206
all we really need to check here is if single register is to be
18207
returned, or multiple register return. */
18209
-thumb2_expand_return (void)
18210
+thumb2_expand_return (bool simple_return)
18213
unsigned long saved_regs_mask;
18214
@@ -23335,7 +24020,7 @@
18215
if (saved_regs_mask & (1 << i))
18218
- if (saved_regs_mask)
18219
+ if (!simple_return && saved_regs_mask)
18223
@@ -23613,6 +24298,7 @@
18225
if (frame_pointer_needed)
18228
/* Restore stack pointer if necessary. */
18231
@@ -23623,9 +24309,12 @@
18232
/* Force out any pending memory operations that reference stacked data
18233
before stack de-allocation occurs. */
18234
emit_insn (gen_blockage ());
18235
- emit_insn (gen_addsi3 (stack_pointer_rtx,
18236
- hard_frame_pointer_rtx,
18237
- GEN_INT (amount)));
18238
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18239
+ hard_frame_pointer_rtx,
18240
+ GEN_INT (amount)));
18241
+ arm_add_cfa_adjust_cfa_note (insn, amount,
18242
+ stack_pointer_rtx,
18243
+ hard_frame_pointer_rtx);
18245
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
18247
@@ -23635,16 +24324,25 @@
18249
/* In Thumb-2 mode, the frame pointer points to the last saved
18251
- amount = offsets->locals_base - offsets->saved_regs;
18253
- emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18254
- hard_frame_pointer_rtx,
18255
- GEN_INT (amount)));
18256
+ amount = offsets->locals_base - offsets->saved_regs;
18259
+ insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18260
+ hard_frame_pointer_rtx,
18261
+ GEN_INT (amount)));
18262
+ arm_add_cfa_adjust_cfa_note (insn, amount,
18263
+ hard_frame_pointer_rtx,
18264
+ hard_frame_pointer_rtx);
18267
/* Force out any pending memory operations that reference stacked data
18268
before stack de-allocation occurs. */
18269
emit_insn (gen_blockage ());
18270
- emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
18271
+ insn = emit_insn (gen_movsi (stack_pointer_rtx,
18272
+ hard_frame_pointer_rtx));
18273
+ arm_add_cfa_adjust_cfa_note (insn, 0,
18274
+ stack_pointer_rtx,
18275
+ hard_frame_pointer_rtx);
18276
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
18278
emit_insn (gen_force_register_use (stack_pointer_rtx));
18279
@@ -23657,12 +24355,15 @@
18280
amount = offsets->outgoing_args - offsets->saved_regs;
18284
/* Force out any pending memory operations that reference stacked data
18285
before stack de-allocation occurs. */
18286
emit_insn (gen_blockage ());
18287
- emit_insn (gen_addsi3 (stack_pointer_rtx,
18288
- stack_pointer_rtx,
18289
- GEN_INT (amount)));
18290
+ tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
18291
+ stack_pointer_rtx,
18292
+ GEN_INT (amount)));
18293
+ arm_add_cfa_adjust_cfa_note (tmp, amount,
18294
+ stack_pointer_rtx, stack_pointer_rtx);
18295
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
18297
emit_insn (gen_force_register_use (stack_pointer_rtx));
18298
@@ -23715,6 +24416,8 @@
18299
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
18300
gen_rtx_REG (V2SImode, i),
18302
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
18303
+ stack_pointer_rtx, stack_pointer_rtx);
18306
if (saved_regs_mask)
18307
@@ -23762,6 +24465,9 @@
18308
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
18309
gen_rtx_REG (SImode, i),
18311
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
18312
+ stack_pointer_rtx,
18313
+ stack_pointer_rtx);
18317
@@ -23772,6 +24478,8 @@
18320
thumb2_emit_ldrd_pop (saved_regs_mask);
18321
+ else if (TARGET_ARM && !IS_INTERRUPT (func_type))
18322
+ arm_emit_ldrd_pop (saved_regs_mask);
18324
arm_emit_multi_reg_pop (saved_regs_mask);
18326
@@ -23784,10 +24492,34 @@
18329
if (crtl->args.pretend_args_size)
18330
- emit_insn (gen_addsi3 (stack_pointer_rtx,
18331
- stack_pointer_rtx,
18332
- GEN_INT (crtl->args.pretend_args_size)));
18335
+ rtx dwarf = NULL_RTX;
18336
+ rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
18337
+ stack_pointer_rtx,
18338
+ GEN_INT (crtl->args.pretend_args_size)));
18340
+ RTX_FRAME_RELATED_P (tmp) = 1;
18342
+ if (cfun->machine->uses_anonymous_args)
18344
+ /* Restore pretend args. Refer arm_expand_prologue on how to save
18345
+ pretend_args in stack. */
18346
+ int num_regs = crtl->args.pretend_args_size / 4;
18347
+ saved_regs_mask = (0xf0 >> num_regs) & 0xf;
18348
+ for (j = 0, i = 0; j < num_regs; i++)
18349
+ if (saved_regs_mask & (1 << i))
18351
+ rtx reg = gen_rtx_REG (SImode, i);
18352
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
18355
+ REG_NOTES (tmp) = dwarf;
18357
+ arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
18358
+ stack_pointer_rtx, stack_pointer_rtx);
18361
if (!really_return)
18364
@@ -25040,7 +25772,7 @@
18366
/* Neon also supports V2SImode, etc. listed in the clause below. */
18367
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18368
- || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18369
+ || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18372
if ((TARGET_NEON || TARGET_IWMMXT)
18373
@@ -25203,9 +25935,8 @@
18375
nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
18376
p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
18377
- regno = (regno - FIRST_VFP_REGNUM) / 2;
18378
for (i = 0; i < nregs; i++)
18379
- XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
18380
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
18384
@@ -25455,9 +26186,17 @@
18385
handled_one = true;
18388
+ /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
18389
+ to get correct dwarf information for shrink-wrap. We should not
18390
+ emit unwind information for it because these are used either for
18391
+ pretend arguments or notes to adjust sp and restore registers from
18393
+ case REG_CFA_ADJUST_CFA:
18394
+ case REG_CFA_RESTORE:
18397
case REG_CFA_DEF_CFA:
18398
case REG_CFA_EXPRESSION:
18399
- case REG_CFA_ADJUST_CFA:
18400
case REG_CFA_OFFSET:
18401
/* ??? Only handling here what we actually emit. */
18402
gcc_unreachable ();
18403
@@ -25855,6 +26594,7 @@
18411
@@ -25883,6 +26623,7 @@
18412
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18413
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18414
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18415
+ { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
18416
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18417
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18418
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18419
@@ -25981,6 +26722,60 @@
18420
return !TARGET_THUMB1;
18424
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
18426
+ enum machine_mode in_mode, out_mode;
18429
+ if (TREE_CODE (type_out) != VECTOR_TYPE
18430
+ || TREE_CODE (type_in) != VECTOR_TYPE
18431
+ || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
18432
+ return NULL_TREE;
18434
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
18435
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
18436
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
18437
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
18439
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
18440
+ decl of the vectorized builtin for the appropriate vector mode.
18441
+ NULL_TREE is returned if no such builtin is available. */
18442
+#undef ARM_CHECK_BUILTIN_MODE
18443
+#define ARM_CHECK_BUILTIN_MODE(C) \
18444
+ (out_mode == SFmode && out_n == C \
18445
+ && in_mode == SFmode && in_n == C)
18447
+#undef ARM_FIND_VRINT_VARIANT
18448
+#define ARM_FIND_VRINT_VARIANT(N) \
18449
+ (ARM_CHECK_BUILTIN_MODE (2) \
18450
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
18451
+ : (ARM_CHECK_BUILTIN_MODE (4) \
18452
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
18455
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
18457
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
18460
+ case BUILT_IN_FLOORF:
18461
+ return ARM_FIND_VRINT_VARIANT (vrintm);
18462
+ case BUILT_IN_CEILF:
18463
+ return ARM_FIND_VRINT_VARIANT (vrintp);
18464
+ case BUILT_IN_TRUNCF:
18465
+ return ARM_FIND_VRINT_VARIANT (vrintz);
18466
+ case BUILT_IN_ROUNDF:
18467
+ return ARM_FIND_VRINT_VARIANT (vrinta);
18469
+ return NULL_TREE;
18472
+ return NULL_TREE;
18474
+#undef ARM_CHECK_BUILTIN_MODE
18475
+#undef ARM_FIND_VRINT_VARIANT
18477
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
18478
static HOST_WIDE_INT
18479
arm_vector_alignment (const_tree type)
18480
@@ -26211,40 +27006,72 @@
18481
emit_insn (gen_memory_barrier ());
18484
-/* Emit the load-exclusive and store-exclusive instructions. */
18485
+/* Emit the load-exclusive and store-exclusive instructions.
18486
+ Use acquire and release versions if necessary. */
18489
-arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
18490
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
18492
rtx (*gen) (rtx, rtx);
18497
- case QImode: gen = gen_arm_load_exclusiveqi; break;
18498
- case HImode: gen = gen_arm_load_exclusivehi; break;
18499
- case SImode: gen = gen_arm_load_exclusivesi; break;
18500
- case DImode: gen = gen_arm_load_exclusivedi; break;
18502
- gcc_unreachable ();
18505
+ case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
18506
+ case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
18507
+ case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
18508
+ case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
18510
+ gcc_unreachable ();
18517
+ case QImode: gen = gen_arm_load_exclusiveqi; break;
18518
+ case HImode: gen = gen_arm_load_exclusivehi; break;
18519
+ case SImode: gen = gen_arm_load_exclusivesi; break;
18520
+ case DImode: gen = gen_arm_load_exclusivedi; break;
18522
+ gcc_unreachable ();
18526
emit_insn (gen (rval, mem));
18530
-arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
18531
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
18532
+ rtx mem, bool rel)
18534
rtx (*gen) (rtx, rtx, rtx);
18539
- case QImode: gen = gen_arm_store_exclusiveqi; break;
18540
- case HImode: gen = gen_arm_store_exclusivehi; break;
18541
- case SImode: gen = gen_arm_store_exclusivesi; break;
18542
- case DImode: gen = gen_arm_store_exclusivedi; break;
18544
- gcc_unreachable ();
18547
+ case QImode: gen = gen_arm_store_release_exclusiveqi; break;
18548
+ case HImode: gen = gen_arm_store_release_exclusivehi; break;
18549
+ case SImode: gen = gen_arm_store_release_exclusivesi; break;
18550
+ case DImode: gen = gen_arm_store_release_exclusivedi; break;
18552
+ gcc_unreachable ();
18559
+ case QImode: gen = gen_arm_store_exclusiveqi; break;
18560
+ case HImode: gen = gen_arm_store_exclusivehi; break;
18561
+ case SImode: gen = gen_arm_store_exclusivesi; break;
18562
+ case DImode: gen = gen_arm_store_exclusivedi; break;
18564
+ gcc_unreachable ();
18568
emit_insn (gen (bval, rval, mem));
18570
@@ -26279,6 +27106,15 @@
18571
mod_f = operands[7];
18572
mode = GET_MODE (mem);
18574
+ /* Normally the succ memory model must be stronger than fail, but in the
18575
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
18576
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
18578
+ if (TARGET_HAVE_LDACQ
18579
+ && INTVAL (mod_f) == MEMMODEL_ACQUIRE
18580
+ && INTVAL (mod_s) == MEMMODEL_RELEASE)
18581
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
18586
@@ -26353,8 +27189,20 @@
18587
scratch = operands[7];
18588
mode = GET_MODE (mem);
18590
- arm_pre_atomic_barrier (mod_s);
18591
+ bool use_acquire = TARGET_HAVE_LDACQ
18592
+ && !(mod_s == MEMMODEL_RELAXED
18593
+ || mod_s == MEMMODEL_CONSUME
18594
+ || mod_s == MEMMODEL_RELEASE);
18596
+ bool use_release = TARGET_HAVE_LDACQ
18597
+ && !(mod_s == MEMMODEL_RELAXED
18598
+ || mod_s == MEMMODEL_CONSUME
18599
+ || mod_s == MEMMODEL_ACQUIRE);
18601
+ /* Checks whether a barrier is needed and emits one accordingly. */
18602
+ if (!(use_acquire || use_release))
18603
+ arm_pre_atomic_barrier (mod_s);
18608
@@ -26363,7 +27211,7 @@
18610
label2 = gen_label_rtx ();
18612
- arm_emit_load_exclusive (mode, rval, mem);
18613
+ arm_emit_load_exclusive (mode, rval, mem, use_acquire);
18615
cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
18616
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18617
@@ -26371,7 +27219,7 @@
18618
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
18619
emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
18621
- arm_emit_store_exclusive (mode, scratch, mem, newval);
18622
+ arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
18624
/* Weak or strong, we want EQ to be true for success, so that we
18625
match the flags that we got from the compare above. */
18626
@@ -26390,7 +27238,9 @@
18627
if (mod_f != MEMMODEL_RELAXED)
18628
emit_label (label2);
18630
- arm_post_atomic_barrier (mod_s);
18631
+ /* Checks whether a barrier is needed and emits one accordingly. */
18632
+ if (!(use_acquire || use_release))
18633
+ arm_post_atomic_barrier (mod_s);
18635
if (mod_f == MEMMODEL_RELAXED)
18636
emit_label (label2);
18637
@@ -26405,8 +27255,20 @@
18638
enum machine_mode wmode = (mode == DImode ? DImode : SImode);
18641
- arm_pre_atomic_barrier (model);
18642
+ bool use_acquire = TARGET_HAVE_LDACQ
18643
+ && !(model == MEMMODEL_RELAXED
18644
+ || model == MEMMODEL_CONSUME
18645
+ || model == MEMMODEL_RELEASE);
18647
+ bool use_release = TARGET_HAVE_LDACQ
18648
+ && !(model == MEMMODEL_RELAXED
18649
+ || model == MEMMODEL_CONSUME
18650
+ || model == MEMMODEL_ACQUIRE);
18652
+ /* Checks whether a barrier is needed and emits one accordingly. */
18653
+ if (!(use_acquire || use_release))
18654
+ arm_pre_atomic_barrier (model);
18656
label = gen_label_rtx ();
18657
emit_label (label);
18659
@@ -26418,7 +27280,7 @@
18661
value = simplify_gen_subreg (wmode, value, mode, 0);
18663
- arm_emit_load_exclusive (mode, old_out, mem);
18664
+ arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
18668
@@ -26466,12 +27328,15 @@
18672
- arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
18673
+ arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
18676
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18677
emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
18679
- arm_post_atomic_barrier (model);
18680
+ /* Checks whether a barrier is needed and emits one accordingly. */
18681
+ if (!(use_acquire || use_release))
18682
+ arm_post_atomic_barrier (model);
18685
#define MAX_VECT_LEN 16
18686
--- a/src/gcc/config/arm/arm.h
18687
+++ b/src/gcc/config/arm/arm.h
18688
@@ -350,10 +350,16 @@
18689
#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \
18692
+/* Nonzero if this chip supports load-acquire and store-release. */
18693
+#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8)
18695
/* Nonzero if integer division instructions supported. */
18696
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
18697
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
18699
+/* Should NEON be used for 64-bits bitops. */
18700
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
18702
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
18703
then TARGET_AAPCS_BASED must be true -- but the converse does not
18704
hold. TARGET_BPABI implies the use of the BPABI runtime library,
18705
@@ -539,6 +545,10 @@
18706
/* Nonzero if chip supports integer division instruction in Thumb mode. */
18707
extern int arm_arch_thumb_hwdiv;
18709
+/* Nonzero if we should use Neon to handle 64-bits operations rather
18710
+ than core registers. */
18711
+extern int prefer_neon_for_64bits;
18713
#ifndef TARGET_DEFAULT
18714
#define TARGET_DEFAULT (MASK_APCS_FRAME)
18716
@@ -1040,7 +1050,7 @@
18717
/* Modes valid for Neon D registers. */
18718
#define VALID_NEON_DREG_MODE(MODE) \
18719
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
18720
- || (MODE) == V2SFmode || (MODE) == DImode)
18721
+ || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
18723
/* Modes valid for Neon Q registers. */
18724
#define VALID_NEON_QREG_MODE(MODE) \
18725
@@ -1130,6 +1140,7 @@
18729
+ CALLER_SAVE_REGS,
18733
@@ -1156,6 +1167,7 @@
18737
+ "CALLER_SAVE_REGS", \
18740
"VFP_D0_D7_REGS", \
18741
@@ -1181,6 +1193,7 @@
18742
{ 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
18743
{ 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \
18744
{ 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \
18745
+ { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
18746
{ 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
18747
{ 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
18748
{ 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \
18749
@@ -1639,7 +1652,7 @@
18751
#define EXIT_IGNORE_STACK 1
18753
-#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNUM)
18754
+#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == LR_REGNUM)
18756
/* Determine if the epilogue should be output as RTL.
18757
You should override this if you define FUNCTION_EXTRA_EPILOGUE. */
18758
--- a/src/gcc/config/arm/unspecs.md
18759
+++ b/src/gcc/config/arm/unspecs.md
18761
; FPSCR rounding mode and signal inexactness.
18762
UNSPEC_VRINTA ; Represent a float to integral float rounding
18763
; towards nearest, ties away from zero.
18764
+ UNSPEC_RRX ; Rotate Right with Extend shifts register right
18765
+ ; by one place, with Carry flag shifted into bit[31].
18768
(define_c_enum "unspec" [
18769
@@ -139,6 +141,10 @@
18770
VUNSPEC_ATOMIC_OP ; Represent an atomic operation.
18771
VUNSPEC_LL ; Represent a load-register-exclusive.
18772
VUNSPEC_SC ; Represent a store-register-exclusive.
18773
+ VUNSPEC_LAX ; Represent a load-register-acquire-exclusive.
18774
+ VUNSPEC_SLX ; Represent a store-register-release-exclusive.
18775
+ VUNSPEC_LDA ; Represent a store-register-acquire.
18776
+ VUNSPEC_STL ; Represent a store-register-release.
18779
;; Enumerators for NEON unspecs.
18780
--- a/src/gcc/config/arm/arm-cores.def
18781
+++ b/src/gcc/config/arm/arm-cores.def
18782
@@ -129,9 +129,11 @@
18783
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
18784
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
18785
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
18786
+ARM_CORE("cortex-a53", cortexa53, 8A, FL_LDSCHED, cortex_a5)
18787
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
18788
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
18789
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
18790
+ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
18791
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
18792
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
18793
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
18794
--- a/src/gcc/config/arm/arm-tune.md
18795
+++ b/src/gcc/config/arm/arm-tune.md
18797
;; -*- buffer-read-only: t -*-
18798
;; Generated automatically by gentune.sh from arm-cores.def
18799
(define_attr "tune"
18800
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
18801
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexr4,cortexr4f,cortexr5,cortexr7,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
18802
(const (symbol_ref "((enum attr_tune) arm_tune)")))
18803
--- a/src/gcc/config/arm/arm-protos.h
18804
+++ b/src/gcc/config/arm/arm-protos.h
18805
@@ -24,12 +24,13 @@
18807
extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
18808
extern int use_return_insn (int, rtx);
18809
+extern bool use_simple_return_p (void);
18810
extern enum reg_class arm_regno_class (int);
18811
extern void arm_load_pic_register (unsigned long);
18812
extern int arm_volatile_func (void);
18813
extern void arm_expand_prologue (void);
18814
extern void arm_expand_epilogue (bool);
18815
-extern void thumb2_expand_return (void);
18816
+extern void thumb2_expand_return (bool);
18817
extern const char *arm_strip_name_encoding (const char *);
18818
extern void arm_asm_output_labelref (FILE *, const char *);
18819
extern void thumb2_asm_output_opcode (FILE *);
18821
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
18822
rtx (*) (rtx, rtx, rtx));
18823
extern rtx neon_make_constant (rtx);
18824
+extern tree arm_builtin_vectorized_function (tree, tree, tree);
18825
extern void neon_expand_vector_init (rtx, rtx);
18826
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
18827
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
18828
@@ -117,7 +119,9 @@
18829
extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
18830
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
18831
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
18832
+extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
18833
extern int arm_gen_movmemqi (rtx *);
18834
+extern bool gen_movmem_ldrd_strd (rtx *);
18835
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
18836
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
18838
@@ -269,6 +273,8 @@
18839
bool logical_op_non_short_circuit[2];
18840
/* Vectorizer costs. */
18841
const struct cpu_vec_costs* vec_costs;
18842
+ /* Prefer Neon for 64-bit bitops. */
18843
+ bool prefer_neon_for_64bits;
18846
extern const struct tune_params *current_tune;
18847
--- a/src/gcc/config/arm/vfp.md
18848
+++ b/src/gcc/config/arm/vfp.md
18849
@@ -132,8 +132,8 @@
18852
(define_insn "*movdi_vfp"
18853
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv")
18854
- (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
18855
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
18856
+ (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
18857
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8
18858
&& ( register_operand (operands[0], DImode)
18859
|| register_operand (operands[1], DImode))
18860
--- a/src/gcc/config/arm/neon.md
18861
+++ b/src/gcc/config/arm/neon.md
18862
@@ -487,7 +487,7 @@
18863
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
18864
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
18865
(set_attr "length" "*,8,8,*,8,8,8")
18866
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
18867
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
18870
(define_insn "*sub<mode>3_neon"
18871
@@ -524,7 +524,7 @@
18872
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
18873
(set_attr "conds" "*,clob,clob,clob,*")
18874
(set_attr "length" "*,8,8,8,*")
18875
- (set_attr "arch" "nota8,*,*,*,onlya8")]
18876
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
18879
(define_insn "*mul<mode>3_neon"
18880
@@ -699,7 +699,7 @@
18882
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
18883
(set_attr "length" "*,*,8,8,*,*")
18884
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
18885
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
18888
;; The concrete forms of the Neon immediate-logic instructions are vbic and
18889
@@ -724,29 +724,6 @@
18890
[(set_attr "neon_type" "neon_int_1")]
18893
-(define_insn "anddi3_neon"
18894
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
18895
- (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
18896
- (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))]
18899
- switch (which_alternative)
18901
- case 0: /* fall through */
18902
- case 4: return "vand\t%P0, %P1, %P2";
18903
- case 1: /* fall through */
18904
- case 5: return neon_output_logic_immediate ("vand", &operands[2],
18905
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
18906
- case 2: return "#";
18907
- case 3: return "#";
18908
- default: gcc_unreachable ();
18911
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
18912
- (set_attr "length" "*,*,8,8,*,*")
18913
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
18916
(define_insn "orn<mode>3_neon"
18917
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
18918
(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
18919
@@ -840,7 +817,7 @@
18920
veor\t%P0, %P1, %P2"
18921
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
18922
(set_attr "length" "*,8,8,*")
18923
- (set_attr "arch" "nota8,*,*,onlya8")]
18924
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
18927
(define_insn "one_cmpl<mode>2"
18928
@@ -1162,7 +1139,7 @@
18932
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
18933
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
18934
(set_attr "opt" "*,*,speed,speed,*,*")]
18937
@@ -1263,7 +1240,7 @@
18941
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
18942
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
18943
(set_attr "opt" "*,*,speed,speed,*,*")]
18946
@@ -3281,6 +3258,24 @@
18947
(const_string "neon_fp_vadd_qqq_vabs_qq")))]
18950
+(define_insn "neon_vcvtv4sfv4hf"
18951
+ [(set (match_operand:V4SF 0 "s_register_operand" "=w")
18952
+ (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
18954
+ "TARGET_NEON && TARGET_FP16"
18955
+ "vcvt.f32.f16\t%q0, %P1"
18956
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
18959
+(define_insn "neon_vcvtv4hfv4sf"
18960
+ [(set (match_operand:V4HF 0 "s_register_operand" "=w")
18961
+ (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
18963
+ "TARGET_NEON && TARGET_FP16"
18964
+ "vcvt.f16.f32\t%P0, %q1"
18965
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
18968
(define_insn "neon_vcvt_n<mode>"
18969
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
18970
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
18971
@@ -5611,7 +5606,7 @@
18972
(match_operand:SI 3 "immediate_operand" "")]
18975
- emit_insn (gen_and<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
18976
+ emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
18980
--- a/src/gcc/config/arm/arm_neon_builtins.def
18981
+++ b/src/gcc/config/arm/arm_neon_builtins.def
18983
+/* NEON builtin definitions for ARM.
18984
+ Copyright (C) 2013
18985
+ Free Software Foundation, Inc.
18986
+ Contributed by ARM Ltd.
18988
+ This file is part of GCC.
18990
+ GCC is free software; you can redistribute it and/or modify it
18991
+ under the terms of the GNU General Public License as published
18992
+ by the Free Software Foundation; either version 3, or (at your
18993
+ option) any later version.
18995
+ GCC is distributed in the hope that it will be useful, but WITHOUT
18996
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18997
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18998
+ License for more details.
19000
+ You should have received a copy of the GNU General Public License
19001
+ along with GCC; see the file COPYING3. If not see
19002
+ <http://www.gnu.org/licenses/>. */
19004
+VAR10 (BINOP, vadd,
19005
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19006
+VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19007
+VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19008
+VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19009
+VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19010
+VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19011
+VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19012
+VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19013
+VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19014
+VAR2 (TERNOP, vfma, v2sf, v4sf),
19015
+VAR2 (TERNOP, vfms, v2sf, v4sf),
19016
+VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19017
+VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19018
+VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19019
+VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19020
+VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19021
+VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19022
+VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19023
+VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19024
+VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19025
+VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19026
+VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19027
+VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19028
+VAR2 (BINOP, vqdmull, v4hi, v2si),
19029
+VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19030
+VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19031
+VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19032
+VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19033
+VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19034
+VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19035
+VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19036
+VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19037
+VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19038
+VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19039
+VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19040
+VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19041
+VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19042
+VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19043
+VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19044
+VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19045
+VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19046
+VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19047
+VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19048
+VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19049
+VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19050
+VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19051
+VAR2 (BINOP, vcage, v2sf, v4sf),
19052
+VAR2 (BINOP, vcagt, v2sf, v4sf),
19053
+VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19054
+VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19055
+VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19056
+VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19057
+VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19058
+VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19059
+VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19060
+VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19061
+VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19062
+VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19063
+VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19064
+VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19065
+VAR2 (BINOP, vrecps, v2sf, v4sf),
19066
+VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19067
+VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19068
+VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19069
+VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19070
+VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19071
+VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19072
+VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19073
+VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19074
+VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19075
+VAR2 (UNOP, vcnt, v8qi, v16qi),
19076
+VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19077
+VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19078
+VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19079
+ /* FIXME: vget_lane supports more variants than this! */
19080
+VAR10 (GETLANE, vget_lane,
19081
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19082
+VAR10 (SETLANE, vset_lane,
19083
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19084
+VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19085
+VAR10 (DUP, vdup_n,
19086
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19087
+VAR10 (DUPLANE, vdup_lane,
19088
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19089
+VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19090
+VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19091
+VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19092
+VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19093
+VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19094
+VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19095
+VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19096
+VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19097
+VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19098
+VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19099
+VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19100
+VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19101
+VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19102
+VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19103
+VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19104
+VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19105
+VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19106
+VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19107
+VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19108
+VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19109
+VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19110
+VAR10 (BINOP, vext,
19111
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19112
+VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19113
+VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19114
+VAR2 (UNOP, vrev16, v8qi, v16qi),
19115
+VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19116
+VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19117
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
19118
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
19119
+VAR10 (SELECT, vbsl,
19120
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19121
+VAR2 (RINT, vrintn, v2sf, v4sf),
19122
+VAR2 (RINT, vrinta, v2sf, v4sf),
19123
+VAR2 (RINT, vrintp, v2sf, v4sf),
19124
+VAR2 (RINT, vrintm, v2sf, v4sf),
19125
+VAR2 (RINT, vrintz, v2sf, v4sf),
19126
+VAR2 (RINT, vrintx, v2sf, v4sf),
19127
+VAR1 (VTBL, vtbl1, v8qi),
19128
+VAR1 (VTBL, vtbl2, v8qi),
19129
+VAR1 (VTBL, vtbl3, v8qi),
19130
+VAR1 (VTBL, vtbl4, v8qi),
19131
+VAR1 (VTBX, vtbx1, v8qi),
19132
+VAR1 (VTBX, vtbx2, v8qi),
19133
+VAR1 (VTBX, vtbx3, v8qi),
19134
+VAR1 (VTBX, vtbx4, v8qi),
19135
+VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19136
+VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19137
+VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19138
+VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19139
+VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19140
+VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19141
+VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19142
+VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19143
+VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19144
+VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19145
+VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19146
+VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19147
+VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19148
+VAR10 (LOAD1, vld1,
19149
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19150
+VAR10 (LOAD1LANE, vld1_lane,
19151
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19152
+VAR10 (LOAD1, vld1_dup,
19153
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19154
+VAR10 (STORE1, vst1,
19155
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19156
+VAR10 (STORE1LANE, vst1_lane,
19157
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19159
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19160
+VAR7 (LOADSTRUCTLANE, vld2_lane,
19161
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19162
+VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19163
+VAR9 (STORESTRUCT, vst2,
19164
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19165
+VAR7 (STORESTRUCTLANE, vst2_lane,
19166
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19168
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19169
+VAR7 (LOADSTRUCTLANE, vld3_lane,
19170
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19171
+VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19172
+VAR9 (STORESTRUCT, vst3,
19173
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19174
+VAR7 (STORESTRUCTLANE, vst3_lane,
19175
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19176
+VAR9 (LOADSTRUCT, vld4,
19177
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19178
+VAR7 (LOADSTRUCTLANE, vld4_lane,
19179
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19180
+VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19181
+VAR9 (STORESTRUCT, vst4,
19182
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19183
+VAR7 (STORESTRUCTLANE, vst4_lane,
19184
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19185
+VAR10 (LOGICBINOP, vand,
19186
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19187
+VAR10 (LOGICBINOP, vorr,
19188
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19189
+VAR10 (BINOP, veor,
19190
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19191
+VAR10 (LOGICBINOP, vbic,
19192
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19193
+VAR10 (LOGICBINOP, vorn,
19194
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19195
--- a/src/gcc/config/arm/neon.ml
19196
+++ b/src/gcc/config/arm/neon.ml
19198
<http://www.gnu.org/licenses/>. *)
19200
(* Shorthand types for vector elements. *)
19201
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
19202
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
19203
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
19204
| Cast of elts * elts | NoElts
19207
| T_uint16x4 | T_uint16x8
19208
| T_uint32x2 | T_uint32x4
19209
| T_uint64x1 | T_uint64x2
19211
| T_float32x2 | T_float32x4
19212
| T_poly8x8 | T_poly8x16
19213
| T_poly16x4 | T_poly16x8
19214
@@ -46,11 +47,13 @@
19215
| T_uint8 | T_uint16
19216
| T_uint32 | T_uint64
19217
| T_poly8 | T_poly16
19218
- | T_float32 | T_arrayof of int * vectype
19219
+ | T_float16 | T_float32
19220
+ | T_arrayof of int * vectype
19221
| T_ptrto of vectype | T_const of vectype
19223
| T_intHI | T_intSI
19224
- | T_intDI | T_floatSF
19225
+ | T_intDI | T_floatHF
19228
(* The meanings of the following are:
19229
TImode : "Tetra", two registers (four words).
19231
| Arity3 of vectype * vectype * vectype * vectype
19232
| Arity4 of vectype * vectype * vectype * vectype * vectype
19234
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
19235
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
19236
| V16QI | V8HI | V4SI | V4SF | V2DI
19237
| QI | HI | SI | SF
19239
@@ -284,18 +287,22 @@
19241
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
19242
| Requires_feature of string
19243
+ (* Mark that the intrinsic requires a particular architecture version. *)
19244
| Requires_arch of int
19245
+ (* Mark that the intrinsic requires a particular bit in __ARM_FP to
19247
+ | Requires_FP_bit of int
19249
exception MixedMode of elts * elts
19251
let rec elt_width = function
19252
S8 | U8 | P8 | I8 | B8 -> 8
19253
- | S16 | U16 | P16 | I16 | B16 -> 16
19254
+ | S16 | U16 | P16 | I16 | B16 | F16 -> 16
19255
| S32 | F32 | U32 | I32 | B32 -> 32
19256
| S64 | U64 | I64 | B64 -> 64
19258
let wa = elt_width a and wb = elt_width b in
19259
- if wa = wb then wa else failwith "element width?"
19260
+ if wa = wb then wa else raise (MixedMode (a, b))
19261
| Cast (a, b) -> raise (MixedMode (a, b))
19262
| NoElts -> failwith "No elts"
19264
@@ -303,7 +310,7 @@
19265
S8 | S16 | S32 | S64 -> Signed
19266
| U8 | U16 | U32 | U64 -> Unsigned
19269
+ | F16 | F32 -> Float
19270
| I8 | I16 | I32 | I64 -> Int
19271
| B8 | B16 | B32 | B64 -> Bits
19272
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
19273
@@ -315,6 +322,7 @@
19274
| Signed, 16 -> S16
19275
| Signed, 32 -> S32
19276
| Signed, 64 -> S64
19277
+ | Float, 16 -> F16
19279
| Unsigned, 8 -> U8
19280
| Unsigned, 16 -> U16
19281
@@ -384,7 +392,12 @@
19283
scan ((Array.length operands) - 1)
19285
-let rec mode_of_elt elt shape =
19286
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
19287
+ SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
19288
+ for the given argument position, else determine which argument to return a
19289
+ mode for automatically. *)
19291
+let rec mode_of_elt ?argpos elt shape =
19292
let flt = match elt_class elt with
19293
Float | ConvClass(_, Float) -> true | _ -> false in
19295
@@ -394,7 +407,10 @@
19296
in match shape with
19297
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
19298
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
19299
- [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
19301
+ [| V8QI; V4HF; V2SF; DI |].(idx)
19303
+ [| V8QI; V4HI; V2SI; DI |].(idx)
19304
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
19305
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
19306
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
19307
@@ -404,7 +420,11 @@
19309
[| V8QI; V4HI; V2SI; DI |].(idx)
19310
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
19311
- | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
19312
+ | Use_operands ops ->
19313
+ begin match argpos with
19314
+ None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
19315
+ | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
19317
| _ -> failwith "invalid shape"
19319
(* Modify an element type dependent on the shape of the instruction and the
19320
@@ -454,10 +474,11 @@
19321
| U16 -> T_uint16x4
19322
| U32 -> T_uint32x2
19323
| U64 -> T_uint64x1
19324
+ | F16 -> T_float16x4
19325
| F32 -> T_float32x2
19327
| P16 -> T_poly16x4
19328
- | _ -> failwith "Bad elt type"
19329
+ | _ -> failwith "Bad elt type for Dreg"
19332
begin match elt with
19333
@@ -472,7 +493,7 @@
19334
| F32 -> T_float32x4
19336
| P16 -> T_poly16x8
19337
- | _ -> failwith "Bad elt type"
19338
+ | _ -> failwith "Bad elt type for Qreg"
19341
begin match elt with
19342
@@ -487,7 +508,7 @@
19346
- | _ -> failwith "Bad elt type"
19347
+ | _ -> failwith "Bad elt type for Corereg"
19351
@@ -506,7 +527,7 @@
19352
let vectype_size = function
19353
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
19354
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
19355
- | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
19356
+ | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
19357
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
19358
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
19359
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
19360
@@ -1217,6 +1238,10 @@
19361
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
19362
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
19363
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
19364
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
19365
+ Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
19366
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
19367
+ Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
19368
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
19369
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
19370
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
19371
@@ -1782,7 +1807,7 @@
19372
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
19373
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
19374
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
19375
- | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
19376
+ | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
19377
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
19378
| NoElts -> failwith "No elts"
19380
@@ -1809,6 +1834,7 @@
19381
| T_uint32x4 -> affix "uint32x4"
19382
| T_uint64x1 -> affix "uint64x1"
19383
| T_uint64x2 -> affix "uint64x2"
19384
+ | T_float16x4 -> affix "float16x4"
19385
| T_float32x2 -> affix "float32x2"
19386
| T_float32x4 -> affix "float32x4"
19387
| T_poly8x8 -> affix "poly8x8"
19388
@@ -1825,6 +1851,7 @@
19389
| T_uint64 -> affix "uint64"
19390
| T_poly8 -> affix "poly8"
19391
| T_poly16 -> affix "poly16"
19392
+ | T_float16 -> affix "float16"
19393
| T_float32 -> affix "float32"
19394
| T_immediate _ -> "const int"
19396
@@ -1832,6 +1859,7 @@
19397
| T_intHI -> "__builtin_neon_hi"
19398
| T_intSI -> "__builtin_neon_si"
19399
| T_intDI -> "__builtin_neon_di"
19400
+ | T_floatHF -> "__builtin_neon_hf"
19401
| T_floatSF -> "__builtin_neon_sf"
19402
| T_arrayof (num, base) ->
19403
let basename = name (fun x -> x) base in
19404
@@ -1853,10 +1881,10 @@
19405
| B_XImode -> "__builtin_neon_xi"
19407
let string_of_mode = function
19408
- V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
19409
- | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
19410
- | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
19412
+ V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
19413
+ | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
19414
+ | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
19415
+ | HI -> "hi" | SI -> "si" | SF -> "sf"
19417
(* Use uppercase chars for letters which form part of the intrinsic name, but
19418
should be omitted from the builtin name (the info is passed in an extra
19419
--- a/src/gcc/config/arm/constraints.md
19420
+++ b/src/gcc/config/arm/constraints.md
19422
;; The following register constraints have been used:
19423
;; - in ARM/Thumb-2 state: t, w, x, y, z
19424
;; - in Thumb state: h, b
19425
-;; - in both states: l, c, k
19426
+;; - in both states: l, c, k, q
19427
;; In ARM state, 'l' is an alias for 'r'
19428
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
19431
(define_register_constraint "k" "STACK_REG"
19432
"@internal The stack register.")
19434
+(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS"
19435
+ "@internal In ARM state with LDRD support, core registers, otherwise general registers.")
19437
(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
19439
Thumb only. The union of the low registers and the stack register.")
19441
(define_register_constraint "c" "CC_REG"
19442
"@internal The condition code register.")
19444
+(define_register_constraint "Cs" "CALLER_SAVE_REGS"
19445
+ "@internal The caller save registers. Useful for sibcalls.")
19447
(define_constraint "I"
19448
"In ARM/Thumb-2 state a constant that can be used as an immediate value in a
19449
Data Processing instruction. In Thumb-1 state a constant in the range
19450
@@ -248,6 +254,12 @@
19451
(and (match_code "const_int")
19452
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
19454
+(define_constraint "De"
19456
+ In ARM/Thumb-2 state a const_int that can be used by insn anddi."
19457
+ (and (match_code "const_int")
19458
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
19460
(define_constraint "Di"
19462
In ARM/Thumb-2 state a const_int or const_double where both the high
19463
@@ -391,3 +403,9 @@
19464
;; Additionally, we used to have a Q constraint in Thumb state, but
19465
;; this wasn't really a valid memory constraint. Again, all uses of
19466
;; this now seem to have been removed.
19468
+(define_constraint "Ss"
19470
+ Ss is a symbol reference."
19471
+ (match_code "symbol_ref")
19473
--- a/src/gcc/config/arm/arm-arches.def
19474
+++ b/src/gcc/config/arm/arm-arches.def
19476
ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R)
19477
ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M)
19478
ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM)
19479
-ARM_ARCH("armv8-a", cortexa15, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
19480
+ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
19481
ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
19482
ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
19483
--- a/src/gcc/config/arm/t-arm
19484
+++ b/src/gcc/config/arm/t-arm
19486
$(srcdir)/config/arm/cortex-a8-neon.md \
19487
$(srcdir)/config/arm/cortex-a9.md \
19488
$(srcdir)/config/arm/cortex-a9-neon.md \
19489
+ $(srcdir)/config/arm/cortex-a53.md \
19490
$(srcdir)/config/arm/cortex-m4-fpu.md \
19491
$(srcdir)/config/arm/cortex-m4.md \
19492
$(srcdir)/config/arm/cortex-r4f.md \
19494
$(srcdir)/config/arm/iwmmxt.md \
19495
$(srcdir)/config/arm/iwmmxt2.md \
19496
$(srcdir)/config/arm/ldmstm.md \
19497
+ $(srcdir)/config/arm/ldrdstrd.md \
19498
$(srcdir)/config/arm/marvell-f-iwmmxt.md \
19499
$(srcdir)/config/arm/neon.md \
19500
$(srcdir)/config/arm/predicates.md \
19502
$(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
19503
$(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
19504
intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
19505
- $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def
19506
+ $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
19507
+ $(srcdir)/config/arm/arm_neon_builtins.def
19509
arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
19510
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
19511
--- a/src/gcc/config/arm/arm.opt
19512
+++ b/src/gcc/config/arm/arm.opt
19513
@@ -247,3 +247,7 @@
19515
Target Report Var(unaligned_access) Init(2)
19516
Enable unaligned word and halfword accesses to packed data.
19519
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
19520
+Use Neon to perform 64-bits operations rather than core registers.
19521
--- a/src/gcc/config/arm/ldrdstrd.md
19522
+++ b/src/gcc/config/arm/ldrdstrd.md
19524
+;; ARM ldrd/strd peephole optimizations.
19526
+;; Copyright (C) 2013 Free Software Foundation, Inc.
19528
+;; Written by Greta Yorsh <greta.yorsh@arm.com>
19530
+;; This file is part of GCC.
19532
+;; GCC is free software; you can redistribute it and/or modify it
19533
+;; under the terms of the GNU General Public License as published by
19534
+;; the Free Software Foundation; either version 3, or (at your option)
19535
+;; any later version.
19537
+;; GCC is distributed in the hope that it will be useful, but
19538
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
19539
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19540
+;; General Public License for more details.
19542
+;; You should have received a copy of the GNU General Public License
19543
+;; along with GCC; see the file COPYING3. If not see
19544
+;; <http://www.gnu.org/licenses/>.
19546
+;; The following peephole optimizations identify consecutive memory
19547
+;; accesses, and try to rearrange the operands to enable generation of
19550
+(define_peephole2 ; ldrd
19551
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
19552
+ (match_operand:SI 2 "memory_operand" ""))
19553
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
19554
+ (match_operand:SI 3 "memory_operand" ""))]
19556
+ && current_tune->prefer_ldrd_strd
19557
+ && !optimize_function_for_size_p (cfun)"
19560
+ if (!gen_operands_ldrd_strd (operands, true, false, false))
19562
+ else if (TARGET_ARM)
19564
+ /* In ARM state, the destination registers of LDRD/STRD must be
19565
+ consecutive. We emit DImode access. */
19566
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
19567
+ operands[2] = adjust_address (operands[2], DImode, 0);
19568
+ /* Emit [(set (match_dup 0) (match_dup 2))] */
19569
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
19572
+ else if (TARGET_THUMB2)
19574
+ /* Emit the pattern:
19575
+ [(parallel [(set (match_dup 0) (match_dup 2))
19576
+ (set (match_dup 1) (match_dup 3))])] */
19577
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
19578
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
19579
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
19584
+(define_peephole2 ; strd
19585
+ [(set (match_operand:SI 2 "memory_operand" "")
19586
+ (match_operand:SI 0 "arm_general_register_operand" ""))
19587
+ (set (match_operand:SI 3 "memory_operand" "")
19588
+ (match_operand:SI 1 "arm_general_register_operand" ""))]
19590
+ && current_tune->prefer_ldrd_strd
19591
+ && !optimize_function_for_size_p (cfun)"
19594
+ if (!gen_operands_ldrd_strd (operands, false, false, false))
19596
+ else if (TARGET_ARM)
19598
+ /* In ARM state, the destination registers of LDRD/STRD must be
19599
+ consecutive. We emit DImode access. */
19600
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
19601
+ operands[2] = adjust_address (operands[2], DImode, 0);
19602
+ /* Emit [(set (match_dup 2) (match_dup 0))] */
19603
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
19606
+ else if (TARGET_THUMB2)
19608
+ /* Emit the pattern:
19609
+ [(parallel [(set (match_dup 2) (match_dup 0))
19610
+ (set (match_dup 3) (match_dup 1))])] */
19611
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
19612
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
19613
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
19618
+;; The following peepholes reorder registers to enable LDRD/STRD.
19619
+(define_peephole2 ; strd of constants
19620
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
19621
+ (match_operand:SI 4 "const_int_operand" ""))
19622
+ (set (match_operand:SI 2 "memory_operand" "")
19624
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
19625
+ (match_operand:SI 5 "const_int_operand" ""))
19626
+ (set (match_operand:SI 3 "memory_operand" "")
19629
+ && current_tune->prefer_ldrd_strd
19630
+ && !optimize_function_for_size_p (cfun)"
19633
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
19635
+ else if (TARGET_ARM)
19637
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
19638
+ operands[2] = adjust_address (operands[2], DImode, 0);
19639
+ /* Emit the pattern:
19640
+ [(set (match_dup 0) (match_dup 4))
19641
+ (set (match_dup 1) (match_dup 5))
19642
+ (set (match_dup 2) tmp)] */
19643
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
19644
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
19645
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
19648
+ else if (TARGET_THUMB2)
19650
+ /* Emit the pattern:
19651
+ [(set (match_dup 0) (match_dup 4))
19652
+ (set (match_dup 1) (match_dup 5))
19653
+ (parallel [(set (match_dup 2) (match_dup 0))
19654
+ (set (match_dup 3) (match_dup 1))])] */
19655
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
19656
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
19657
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
19658
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
19659
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
19664
+(define_peephole2 ; strd of constants
19665
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
19666
+ (match_operand:SI 4 "const_int_operand" ""))
19667
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
19668
+ (match_operand:SI 5 "const_int_operand" ""))
19669
+ (set (match_operand:SI 2 "memory_operand" "")
19671
+ (set (match_operand:SI 3 "memory_operand" "")
19674
+ && current_tune->prefer_ldrd_strd
19675
+ && !optimize_function_for_size_p (cfun)"
19678
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
19680
+ else if (TARGET_ARM)
19682
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
19683
+ operands[2] = adjust_address (operands[2], DImode, 0);
19684
+ /* Emit the pattern
19685
+ [(set (match_dup 0) (match_dup 4))
19686
+ (set (match_dup 1) (match_dup 5))
19687
+ (set (match_dup 2) tmp)] */
19688
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
19689
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
19690
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
19693
+ else if (TARGET_THUMB2)
19695
+ /* Emit the pattern:
19696
+ [(set (match_dup 0) (match_dup 4))
19697
+ (set (match_dup 1) (match_dup 5))
19698
+ (parallel [(set (match_dup 2) (match_dup 0))
19699
+ (set (match_dup 3) (match_dup 1))])] */
19700
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
19701
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
19702
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
19703
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
19704
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
19709
+;; The following two peephole optimizations are only relevant for ARM
19710
+;; mode where LDRD/STRD require consecutive registers.
19712
+(define_peephole2 ; swap the destination registers of two loads
19713
+ ; before a commutative operation.
19714
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
19715
+ (match_operand:SI 2 "memory_operand" ""))
19716
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
19717
+ (match_operand:SI 3 "memory_operand" ""))
19718
+ (set (match_operand:SI 4 "arm_general_register_operand" "")
19719
+ (match_operator:SI 5 "commutative_binary_operator"
19720
+ [(match_operand 6 "arm_general_register_operand" "")
19721
+ (match_operand 7 "arm_general_register_operand" "") ]))]
19722
+ "TARGET_LDRD && TARGET_ARM
19723
+ && current_tune->prefer_ldrd_strd
19724
+ && !optimize_function_for_size_p (cfun)
19725
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
19726
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
19727
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
19728
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
19729
+ [(set (match_dup 0) (match_dup 2))
19730
+ (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
19732
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
19738
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
19739
+ operands[2] = adjust_address (operands[2], DImode, 0);
19744
+(define_peephole2 ; swap the destination registers of two loads
19745
+ ; before a commutative operation that sets the flags.
19746
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
19747
+ (match_operand:SI 2 "memory_operand" ""))
19748
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
19749
+ (match_operand:SI 3 "memory_operand" ""))
19751
+ [(set (match_operand:SI 4 "arm_general_register_operand" "")
19752
+ (match_operator:SI 5 "commutative_binary_operator"
19753
+ [(match_operand 6 "arm_general_register_operand" "")
19754
+ (match_operand 7 "arm_general_register_operand" "") ]))
19755
+ (clobber (reg:CC CC_REGNUM))])]
19756
+ "TARGET_LDRD && TARGET_ARM
19757
+ && current_tune->prefer_ldrd_strd
19758
+ && !optimize_function_for_size_p (cfun)
19759
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
19760
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
19761
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
19762
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
19763
+ [(set (match_dup 0) (match_dup 2))
19765
+ [(set (match_dup 4)
19766
+ (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
19767
+ (clobber (reg:CC CC_REGNUM))])]
19769
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
19775
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
19776
+ operands[2] = adjust_address (operands[2], DImode, 0);
19781
+;; TODO: Handle LDRD/STRD with writeback:
19782
+;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
19783
+;; (b) Patterns may be followed by an update of the base address.
19784
--- a/src/gcc/config/arm/predicates.md
19785
+++ b/src/gcc/config/arm/predicates.md
19787
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
19790
+(define_predicate "imm_for_neon_inv_logic_operand"
19791
+ (match_code "const_vector")
19793
+ return (TARGET_NEON
19794
+ && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
19797
+(define_predicate "neon_inv_logic_op2"
19798
+ (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
19799
+ (match_operand 0 "s_register_operand")))
19801
;; Any hard register.
19802
(define_predicate "arm_hard_register_operand"
19804
@@ -145,6 +156,12 @@
19805
(ior (match_operand 0 "arm_rhs_operand")
19806
(match_operand 0 "arm_neg_immediate_operand")))
19808
+(define_predicate "arm_anddi_operand_neon"
19809
+ (ior (match_operand 0 "s_register_operand")
19810
+ (and (match_code "const_int")
19811
+ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
19812
+ (match_operand 0 "neon_inv_logic_op2")))
19814
(define_predicate "arm_adddi_operand"
19815
(ior (match_operand 0 "s_register_operand")
19816
(and (match_code "const_int")
19817
@@ -270,6 +287,18 @@
19818
(define_special_predicate "lt_ge_comparison_operator"
19819
(match_code "lt,ge"))
19821
+;; The vsel instruction only accepts the ARM condition codes listed below.
19822
+(define_special_predicate "arm_vsel_comparison_operator"
19823
+ (and (match_operand 0 "expandable_comparison_operator")
19824
+ (match_test "maybe_get_arm_condition_code (op) == ARM_GE
19825
+ || maybe_get_arm_condition_code (op) == ARM_GT
19826
+ || maybe_get_arm_condition_code (op) == ARM_EQ
19827
+ || maybe_get_arm_condition_code (op) == ARM_VS
19828
+ || maybe_get_arm_condition_code (op) == ARM_LT
19829
+ || maybe_get_arm_condition_code (op) == ARM_LE
19830
+ || maybe_get_arm_condition_code (op) == ARM_NE
19831
+ || maybe_get_arm_condition_code (op) == ARM_VC")))
19833
(define_special_predicate "noov_comparison_operator"
19834
(match_code "lt,ge,eq,ne"))
19836
@@ -513,21 +542,10 @@
19837
&& neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
19840
-(define_predicate "imm_for_neon_inv_logic_operand"
19841
- (match_code "const_vector")
19843
- return (TARGET_NEON
19844
- && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
19847
(define_predicate "neon_logic_op2"
19848
(ior (match_operand 0 "imm_for_neon_logic_operand")
19849
(match_operand 0 "s_register_operand")))
19851
-(define_predicate "neon_inv_logic_op2"
19852
- (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
19853
- (match_operand 0 "s_register_operand")))
19855
;; Predicates for named expanders that overlap multiple ISAs.
19857
(define_predicate "cmpdi_operand"
19858
@@ -617,3 +635,7 @@
19859
(define_predicate "mem_noofs_operand"
19860
(and (match_code "mem")
19861
(match_code "reg" "0")))
19863
+(define_predicate "call_insn_operand"
19864
+ (ior (match_code "symbol_ref")
19865
+ (match_operand 0 "s_register_operand")))
19866
--- a/src/gcc/config/arm/arm_neon.h
19867
+++ b/src/gcc/config/arm/arm_neon.h
19869
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
19870
typedef __builtin_neon_di int64x1_t;
19871
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
19872
+typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
19873
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
19874
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
19875
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
19876
@@ -6016,6 +6017,22 @@
19877
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
19880
+#if ((__ARM_FP & 0x2) != 0)
19881
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
19882
+vcvt_f16_f32 (float32x4_t __a)
19884
+ return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
19888
+#if ((__ARM_FP & 0x2) != 0)
19889
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19890
+vcvt_f32_f16 (float16x4_t __a)
19892
+ return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
19896
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19897
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
19899
--- a/src/gcc/config/arm/cortex-a53.md
19900
+++ b/src/gcc/config/arm/cortex-a53.md
19902
+;; ARM Cortex-A53 pipeline description
19903
+;; Copyright (C) 2013 Free Software Foundation, Inc.
19905
+;; Contributed by ARM Ltd.
19907
+;; This file is part of GCC.
19909
+;; GCC is free software; you can redistribute it and/or modify it
19910
+;; under the terms of the GNU General Public License as published by
19911
+;; the Free Software Foundation; either version 3, or (at your option)
19912
+;; any later version.
19914
+;; GCC is distributed in the hope that it will be useful, but
19915
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
19916
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19917
+;; General Public License for more details.
19919
+;; You should have received a copy of the GNU General Public License
19920
+;; along with GCC; see the file COPYING3. If not see
19921
+;; <http://www.gnu.org/licenses/>.
19923
+(define_automaton "cortex_a53")
19925
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19926
+;; Functional units.
19927
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19929
+;; There are two main integer execution pipelines, described as
19930
+;; slot 0 and issue slot 1.
19932
+(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
19933
+(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
19935
+(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
19936
+(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
19938
+;; The load/store pipeline. Load/store instructions can dual-issue from
19939
+;; either pipeline, but two load/stores cannot simultaneously issue.
19941
+(define_cpu_unit "cortex_a53_ls" "cortex_a53")
19943
+;; The store pipeline. Shared between both execution pipelines.
19945
+(define_cpu_unit "cortex_a53_store" "cortex_a53")
19947
+;; The branch pipeline. Branches can dual-issue with other instructions
19948
+;; (except when those instructions take multiple cycles to issue).
19950
+(define_cpu_unit "cortex_a53_branch" "cortex_a53")
19952
+;; The integer divider.
19954
+(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
19956
+;; The floating-point add pipeline used to model the usage
19957
+;; of the add pipeline by fmac instructions.
19959
+(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
19961
+;; Floating-point div/sqrt (long latency, out-of-order completion).
19963
+(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
19965
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19966
+;; ALU instructions.
19967
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19969
+(define_insn_reservation "cortex_a53_alu" 2
19970
+ (and (eq_attr "tune" "cortexa53")
19971
+ (eq_attr "type" "alu_reg,simple_alu_imm"))
19972
+ "cortex_a53_slot_any")
19974
+(define_insn_reservation "cortex_a53_alu_shift" 2
19975
+ (and (eq_attr "tune" "cortexa53")
19976
+ (eq_attr "type" "alu_shift,alu_shift_reg"))
19977
+ "cortex_a53_slot_any")
19979
+;; Forwarding path for unshifted operands.
19981
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
19982
+ "cortex_a53_alu")
19984
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
19985
+ "cortex_a53_alu_shift"
19986
+ "arm_no_early_alu_shift_dep")
19988
+;; The multiplier pipeline can forward results so there's no need to specify
19989
+;; bypasses. Multiplies can only single-issue currently.
19991
+(define_insn_reservation "cortex_a53_mul" 3
19992
+ (and (eq_attr "tune" "cortexa53")
19993
+ (eq_attr "type" "mult"))
19994
+ "cortex_a53_single_issue")
19996
+;; A multiply with a single-register result or an MLA, followed by an
19997
+;; MLA with an accumulator dependency, has its result forwarded so two
19998
+;; such instructions can issue back-to-back.
20000
+(define_bypass 1 "cortex_a53_mul"
20002
+ "arm_mac_accumulator_is_mul_result")
20004
+;; Punt with a high enough latency for divides.
20005
+(define_insn_reservation "cortex_a53_udiv" 8
20006
+ (and (eq_attr "tune" "cortexa53")
20007
+ (eq_attr "insn" "udiv"))
20008
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
20010
+(define_insn_reservation "cortex_a53_sdiv" 9
20011
+ (and (eq_attr "tune" "cortexa53")
20012
+ (eq_attr "insn" "sdiv"))
20013
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
20016
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
20017
+ "cortex_a53_alu")
20018
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
20019
+ "cortex_a53_alu_shift"
20020
+ "arm_no_early_alu_shift_dep")
20022
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20023
+;; Load/store instructions.
20024
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20026
+;; Address-generation happens in the issue stage.
20028
+(define_insn_reservation "cortex_a53_load1" 3
20029
+ (and (eq_attr "tune" "cortexa53")
20030
+ (eq_attr "type" "load_byte,load1"))
20031
+ "cortex_a53_slot_any+cortex_a53_ls")
20033
+(define_insn_reservation "cortex_a53_store1" 2
20034
+ (and (eq_attr "tune" "cortexa53")
20035
+ (eq_attr "type" "store1"))
20036
+ "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
20038
+(define_insn_reservation "cortex_a53_load2" 3
20039
+ (and (eq_attr "tune" "cortexa53")
20040
+ (eq_attr "type" "load2"))
20041
+ "cortex_a53_single_issue+cortex_a53_ls")
20043
+(define_insn_reservation "cortex_a53_store2" 2
20044
+ (and (eq_attr "tune" "cortexa53")
20045
+ (eq_attr "type" "store2"))
20046
+ "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
20048
+(define_insn_reservation "cortex_a53_load3plus" 4
20049
+ (and (eq_attr "tune" "cortexa53")
20050
+ (eq_attr "type" "load3,load4"))
20051
+ "(cortex_a53_single_issue+cortex_a53_ls)*2")
20053
+(define_insn_reservation "cortex_a53_store3plus" 3
20054
+ (and (eq_attr "tune" "cortexa53")
20055
+ (eq_attr "type" "store3,store4"))
20056
+ "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
20058
+;; Load/store addresses are required early in Issue.
20059
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
20060
+ "cortex_a53_load*"
20061
+ "arm_early_load_addr_dep")
20062
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
20063
+ "cortex_a53_store*"
20064
+ "arm_early_store_addr_dep")
20066
+;; Load data can forward in the ALU pipeline
20067
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
20068
+ "cortex_a53_alu")
20069
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
20070
+ "cortex_a53_alu_shift"
20071
+ "arm_no_early_alu_shift_dep")
20073
+;; ALU ops can forward to stores.
20074
+(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
20075
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
20076
+ "arm_no_early_store_addr_dep")
20078
+(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
20079
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
20080
+ "arm_no_early_store_addr_dep")
20082
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20084
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20086
+;; Currently models all branches as dual-issuable from either execution
20087
+;; slot, which isn't true for all cases. We still need to model indirect
20090
+(define_insn_reservation "cortex_a53_branch" 0
20091
+ (and (eq_attr "tune" "cortexa53")
20092
+ (eq_attr "type" "branch,call"))
20093
+ "cortex_a53_slot_any+cortex_a53_branch")
20095
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20096
+;; Floating-point arithmetic.
20097
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20099
+(define_insn_reservation "cortex_a53_fpalu" 4
20100
+ (and (eq_attr "tune" "cortexa53")
20101
+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
20103
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
20105
+(define_insn_reservation "cortex_a53_fconst" 2
20106
+ (and (eq_attr "tune" "cortexa53")
20107
+ (eq_attr "type" "fconsts,fconstd"))
20108
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
20110
+(define_insn_reservation "cortex_a53_fpmul" 4
20111
+ (and (eq_attr "tune" "cortexa53")
20112
+ (eq_attr "type" "fmuls,fmuld"))
20113
+ "cortex_a53_slot0")
20115
+;; For single-precision multiply-accumulate, the add (accumulate) is issued after
20116
+;; the multiply completes. Model that accordingly.
20118
+(define_insn_reservation "cortex_a53_fpmac" 8
20119
+ (and (eq_attr "tune" "cortexa53")
20120
+ (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
20121
+ "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
20123
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20124
+;; Floating-point divide/square root instructions.
20125
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20126
+;; fsqrt really takes one cycle less, but that is not modelled.
20128
+(define_insn_reservation "cortex_a53_fdivs" 14
20129
+ (and (eq_attr "tune" "cortexa53")
20130
+ (eq_attr "type" "fdivs"))
20131
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
20133
+(define_insn_reservation "cortex_a53_fdivd" 29
20134
+ (and (eq_attr "tune" "cortexa53")
20135
+ (eq_attr "type" "fdivd"))
20136
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
20138
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20139
+;; VFP to/from core transfers.
20140
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20142
+(define_insn_reservation "cortex_a53_r2f" 4
20143
+ (and (eq_attr "tune" "cortexa53")
20144
+ (eq_attr "type" "r_2_f"))
20145
+ "cortex_a53_slot0")
20147
+(define_insn_reservation "cortex_a53_f2r" 2
20148
+ (and (eq_attr "tune" "cortexa53")
20149
+ (eq_attr "type" "f_2_r"))
20150
+ "cortex_a53_slot0")
20152
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20153
+;; VFP flag transfer.
20154
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20156
+(define_insn_reservation "cortex_a53_f_flags" 4
20157
+ (and (eq_attr "tune" "cortexa53")
20158
+ (eq_attr "type" "f_flag"))
20159
+ "cortex_a53_slot0")
20161
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20162
+;; VFP load/store.
20163
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20165
+(define_insn_reservation "cortex_a53_f_loads" 4
20166
+ (and (eq_attr "tune" "cortexa53")
20167
+ (eq_attr "type" "f_loads"))
20168
+ "cortex_a53_slot0")
20170
+(define_insn_reservation "cortex_a53_f_loadd" 5
20171
+ (and (eq_attr "tune" "cortexa53")
20172
+ (eq_attr "type" "f_loadd"))
20173
+ "cortex_a53_slot0")
20175
+(define_insn_reservation "cortex_a53_f_stores" 0
20176
+ (and (eq_attr "tune" "cortexa53")
20177
+ (eq_attr "type" "f_stores"))
20178
+ "cortex_a53_slot0")
20180
+(define_insn_reservation "cortex_a53_f_stored" 0
20181
+ (and (eq_attr "tune" "cortexa53")
20182
+ (eq_attr "type" "f_stored"))
20183
+ "cortex_a53_slot0")
20185
+;; Load-to-use for floating-point values has a penalty of one cycle,
20186
+;; i.e. a latency of two.
20188
+(define_bypass 2 "cortex_a53_f_loads"
20189
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
20190
+ cortex_a53_fdivs, cortex_a53_fdivd,\
20193
+(define_bypass 2 "cortex_a53_f_loadd"
20194
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
20195
+ cortex_a53_fdivs, cortex_a53_fdivd,\
20198
--- a/src/gcc/config/arm/bpabi.h
20199
+++ b/src/gcc/config/arm/bpabi.h
20202
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
20203
|mcpu=marvell-pj4 \
20204
+ |mcpu=cortex-a53 \
20205
|mcpu=generic-armv7-a \
20206
|march=armv7-m|mcpu=cortex-m3 \
20207
|march=armv7e-m|mcpu=cortex-m4 \
20209
" %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \
20211
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
20212
+ |mcpu=cortex-a53 \
20213
|mcpu=marvell-pj4 \
20214
|mcpu=generic-armv7-a \
20215
|march=armv7-m|mcpu=cortex-m3 \
20216
--- a/src/gcc/config/arm/iterators.md
20217
+++ b/src/gcc/config/arm/iterators.md
20218
@@ -496,3 +496,11 @@
20219
(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
20220
(UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
20221
(UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
20222
+;; Both kinds of return insn.
20223
+(define_code_iterator returns [return simple_return])
20224
+(define_code_attr return_str [(return "") (simple_return "simple_")])
20225
+(define_code_attr return_simple_p [(return "false") (simple_return "true")])
20226
+(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
20227
+ (simple_return " && use_simple_return_p ()")])
20228
+(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
20229
+ (simple_return " && use_simple_return_p ()")])
20230
--- a/src/gcc/config/arm/sync.md
20231
+++ b/src/gcc/config/arm/sync.md
20233
(set_attr "conds" "unconditional")
20234
(set_attr "predicable" "no")])
20236
+(define_insn "atomic_load<mode>"
20237
+ [(set (match_operand:QHSI 0 "register_operand" "=r")
20238
+ (unspec_volatile:QHSI
20239
+ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")
20240
+ (match_operand:SI 2 "const_int_operand")] ;; model
20242
+ "TARGET_HAVE_LDACQ"
20244
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
20245
+ if (model == MEMMODEL_RELAXED
20246
+ || model == MEMMODEL_CONSUME
20247
+ || model == MEMMODEL_RELEASE)
20248
+ return \"ldr<sync_sfx>\\t%0, %1\";
20250
+ return \"lda<sync_sfx>\\t%0, %1\";
20254
+(define_insn "atomic_store<mode>"
20255
+ [(set (match_operand:QHSI 0 "memory_operand" "=Q")
20256
+ (unspec_volatile:QHSI
20257
+ [(match_operand:QHSI 1 "general_operand" "r")
20258
+ (match_operand:SI 2 "const_int_operand")] ;; model
20260
+ "TARGET_HAVE_LDACQ"
20262
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
20263
+ if (model == MEMMODEL_RELAXED
20264
+ || model == MEMMODEL_CONSUME
20265
+ || model == MEMMODEL_ACQUIRE)
20266
+ return \"str<sync_sfx>\t%1, %0\";
20268
+ return \"stl<sync_sfx>\t%1, %0\";
20272
;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic,
20273
;; even for a 64-bit aligned address. Instead we use a ldrexd unparied
20275
@@ -327,6 +363,16 @@
20276
"ldrex<sync_sfx>%?\t%0, %C1"
20277
[(set_attr "predicable" "yes")])
20279
+(define_insn "arm_load_acquire_exclusive<mode>"
20280
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
20282
+ (unspec_volatile:NARROW
20283
+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
20285
+ "TARGET_HAVE_LDACQ"
20286
+ "ldaex<sync_sfx>%?\\t%0, %C1"
20287
+ [(set_attr "predicable" "yes")])
20289
(define_insn "arm_load_exclusivesi"
20290
[(set (match_operand:SI 0 "s_register_operand" "=r")
20291
(unspec_volatile:SI
20292
@@ -336,6 +382,15 @@
20294
[(set_attr "predicable" "yes")])
20296
+(define_insn "arm_load_acquire_exclusivesi"
20297
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
20298
+ (unspec_volatile:SI
20299
+ [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
20301
+ "TARGET_HAVE_LDACQ"
20302
+ "ldaex%?\t%0, %C1"
20303
+ [(set_attr "predicable" "yes")])
20305
(define_insn "arm_load_exclusivedi"
20306
[(set (match_operand:DI 0 "s_register_operand" "=r")
20307
(unspec_volatile:DI
20308
@@ -345,6 +400,15 @@
20309
"ldrexd%?\t%0, %H0, %C1"
20310
[(set_attr "predicable" "yes")])
20312
+(define_insn "arm_load_acquire_exclusivedi"
20313
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
20314
+ (unspec_volatile:DI
20315
+ [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
20317
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
20318
+ "ldaexd%?\t%0, %H0, %C1"
20319
+ [(set_attr "predicable" "yes")])
20321
(define_insn "arm_store_exclusive<mode>"
20322
[(set (match_operand:SI 0 "s_register_operand" "=&r")
20323
(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
20324
@@ -368,3 +432,31 @@
20325
return "strex<sync_sfx>%?\t%0, %2, %C1";
20327
[(set_attr "predicable" "yes")])
20329
+(define_insn "arm_store_release_exclusivedi"
20330
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
20331
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
20332
+ (set (match_operand:DI 1 "mem_noofs_operand" "=Ua")
20333
+ (unspec_volatile:DI
20334
+ [(match_operand:DI 2 "s_register_operand" "r")]
20336
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
20338
+ rtx value = operands[2];
20339
+ /* See comment in arm_store_exclusive<mode> above. */
20340
+ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
20341
+ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
20342
+ return "stlexd%?\t%0, %2, %3, %C1";
20344
+ [(set_attr "predicable" "yes")])
20346
+(define_insn "arm_store_release_exclusive<mode>"
20347
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
20348
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
20349
+ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
20350
+ (unspec_volatile:QHSI
20351
+ [(match_operand:QHSI 2 "s_register_operand" "r")]
20353
+ "TARGET_HAVE_LDACQ"
20354
+ "stlex<sync_sfx>%?\t%0, %2, %C1"
20355
+ [(set_attr "predicable" "yes")])
20356
--- a/src/gcc/config/arm/neon-testgen.ml
20357
+++ b/src/gcc/config/arm/neon-testgen.ml
20358
@@ -163,10 +163,12 @@
20359
match List.find (fun feature ->
20360
match feature with Requires_feature _ -> true
20361
| Requires_arch _ -> true
20362
+ | Requires_FP_bit 1 -> true
20365
Requires_feature "FMA" -> "arm_neonv2"
20366
| Requires_arch 8 -> "arm_v8_neon"
20367
+ | Requires_FP_bit 1 -> "arm_neon_fp16"
20368
| _ -> assert false
20369
with Not_found -> "arm_neon"
20371
--- a/src/gcc/config/arm/arm.md
20372
+++ b/src/gcc/config/arm/arm.md
20374
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
20375
; arm_arch6. This attribute is used to compute attribute "enabled",
20376
; use type "any" to enable an alternative in all cases.
20377
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
20378
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
20379
(const_string "any"))
20381
(define_attr "arch_enabled" "no,yes"
20382
@@ -129,24 +129,16 @@
20383
(match_test "TARGET_32BIT && !arm_arch6"))
20384
(const_string "yes")
20386
- (and (eq_attr "arch" "onlya8")
20387
- (eq_attr "tune" "cortexa8"))
20388
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
20389
+ (match_test "TARGET_NEON")
20390
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
20391
(const_string "yes")
20393
- (and (eq_attr "arch" "neon_onlya8")
20394
- (eq_attr "tune" "cortexa8")
20395
- (match_test "TARGET_NEON"))
20396
+ (and (eq_attr "arch" "neon_for_64bits")
20397
+ (match_test "TARGET_NEON")
20398
+ (match_test "TARGET_PREFER_NEON_64BITS"))
20399
(const_string "yes")
20401
- (and (eq_attr "arch" "nota8")
20402
- (not (eq_attr "tune" "cortexa8")))
20403
- (const_string "yes")
20405
- (and (eq_attr "arch" "neon_nota8")
20406
- (not (eq_attr "tune" "cortexa8"))
20407
- (match_test "TARGET_NEON"))
20408
- (const_string "yes")
20410
(and (eq_attr "arch" "iwmmxt2")
20411
(match_test "TARGET_REALLY_IWMMXT2"))
20412
(const_string "yes")]
20413
@@ -296,6 +288,8 @@
20422
@@ -502,7 +496,7 @@
20424
(define_attr "generic_sched" "yes,no"
20425
(const (if_then_else
20426
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4,marvell_pj4")
20427
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexm4,marvell_pj4")
20428
(eq_attr "tune_cortexr4" "yes"))
20429
(const_string "no")
20430
(const_string "yes"))))
20431
@@ -510,7 +504,7 @@
20432
(define_attr "generic_vfp" "yes,no"
20433
(const (if_then_else
20434
(and (eq_attr "fpu" "vfp")
20435
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4,marvell_pj4")
20436
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
20437
(eq_attr "tune_cortexr4" "no"))
20438
(const_string "yes")
20439
(const_string "no"))))
20440
@@ -531,6 +525,7 @@
20441
(include "cortex-a8.md")
20442
(include "cortex-a9.md")
20443
(include "cortex-a15.md")
20444
+(include "cortex-a53.md")
20445
(include "cortex-r4.md")
20446
(include "cortex-r4f.md")
20447
(include "cortex-m4.md")
20448
@@ -844,7 +839,7 @@
20450
;; This is the canonicalization of addsi3_compare0_for_combiner when the
20451
;; addend is a constant.
20452
-(define_insn "*cmpsi2_addneg"
20453
+(define_insn "cmpsi2_addneg"
20454
[(set (reg:CC CC_REGNUM)
20456
(match_operand:SI 1 "s_register_operand" "r,r")
20457
@@ -975,7 +970,8 @@
20460
sbc%?\\t%0, %1, #%B2"
20461
- [(set_attr "conds" "use")]
20462
+ [(set_attr "conds" "use")
20463
+ (set_attr "predicable" "yes")]
20466
(define_insn "*addsi3_carryin_alt2_<optab>"
20467
@@ -987,7 +983,8 @@
20470
sbc%?\\t%0, %1, #%B2"
20471
- [(set_attr "conds" "use")]
20472
+ [(set_attr "conds" "use")
20473
+ (set_attr "predicable" "yes")]
20476
(define_insn "*addsi3_carryin_shift_<optab>"
20477
@@ -1001,6 +998,7 @@
20479
"adc%?\\t%0, %1, %3%S2"
20480
[(set_attr "conds" "use")
20481
+ (set_attr "predicable" "yes")
20482
(set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
20483
(const_string "alu_shift")
20484
(const_string "alu_shift_reg")))]
20485
@@ -1017,26 +1015,88 @@
20486
[(set_attr "conds" "set")]
20489
-(define_expand "incscc"
20490
+(define_insn "*subsi3_carryin"
20491
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
20492
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
20493
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
20494
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
20495
+ (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
20496
+ (match_operand:SI 2 "s_register_operand" "r,r"))
20497
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20501
+ sbc%?\\t%0, %1, %2
20502
+ rsc%?\\t%0, %2, %1"
20503
+ [(set_attr "conds" "use")
20504
+ (set_attr "arch" "*,a")
20505
+ (set_attr "predicable" "yes")]
20508
-(define_insn "*arm_incscc"
20509
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
20510
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
20511
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
20512
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
20513
+(define_insn "*subsi3_carryin_const"
20514
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
20515
+ (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r")
20516
+ (match_operand:SI 2 "arm_not_operand" "K"))
20517
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20519
+ "sbc\\t%0, %1, #%B2"
20520
+ [(set_attr "conds" "use")]
20523
+(define_insn "*subsi3_carryin_compare"
20524
+ [(set (reg:CC CC_REGNUM)
20525
+ (compare:CC (match_operand:SI 1 "s_register_operand" "r")
20526
+ (match_operand:SI 2 "s_register_operand" "r")))
20527
+ (set (match_operand:SI 0 "s_register_operand" "=r")
20528
+ (minus:SI (minus:SI (match_dup 1)
20530
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20532
+ "sbcs\\t%0, %1, %2"
20533
+ [(set_attr "conds" "set")]
20536
+(define_insn "*subsi3_carryin_compare_const"
20537
+ [(set (reg:CC CC_REGNUM)
20538
+ (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r")
20539
+ (match_operand:SI 2 "arm_not_operand" "K")))
20540
+ (set (match_operand:SI 0 "s_register_operand" "=r")
20541
+ (minus:SI (plus:SI (match_dup 1)
20543
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20545
+ "sbcs\\t%0, %1, #%B2"
20546
+ [(set_attr "conds" "set")]
20549
+(define_insn "*subsi3_carryin_shift"
20550
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
20551
+ (minus:SI (minus:SI
20552
+ (match_operand:SI 1 "s_register_operand" "r")
20553
+ (match_operator:SI 2 "shift_operator"
20554
+ [(match_operand:SI 3 "s_register_operand" "r")
20555
+ (match_operand:SI 4 "reg_or_int_operand" "rM")]))
20556
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20558
+ "sbc%?\\t%0, %1, %3%S2"
20559
+ [(set_attr "conds" "use")
20560
+ (set_attr "predicable" "yes")
20561
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
20562
+ (const_string "alu_shift")
20563
+ (const_string "alu_shift_reg")))]
20566
+(define_insn "*rsbsi3_carryin_shift"
20567
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
20568
+ (minus:SI (minus:SI
20569
+ (match_operator:SI 2 "shift_operator"
20570
+ [(match_operand:SI 3 "s_register_operand" "r")
20571
+ (match_operand:SI 4 "reg_or_int_operand" "rM")])
20572
+ (match_operand:SI 1 "s_register_operand" "r"))
20573
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20576
- add%d2\\t%0, %1, #1
20577
- mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
20578
+ "rsc%?\\t%0, %1, %3%S2"
20579
[(set_attr "conds" "use")
20580
- (set_attr "length" "4,8")]
20581
+ (set_attr "predicable" "yes")
20582
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
20583
+ (const_string "alu_shift")
20584
+ (const_string "alu_shift_reg")))]
20587
; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant.
20588
@@ -1087,13 +1147,27 @@
20592
-(define_insn "*arm_subdi3"
20593
+(define_insn_and_split "*arm_subdi3"
20594
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r")
20595
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
20596
(match_operand:DI 2 "s_register_operand" "r,0,0")))
20597
(clobber (reg:CC CC_REGNUM))]
20598
"TARGET_32BIT && !TARGET_NEON"
20599
- "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
20600
+ "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
20601
+ "&& reload_completed"
20602
+ [(parallel [(set (reg:CC CC_REGNUM)
20603
+ (compare:CC (match_dup 1) (match_dup 2)))
20604
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
20605
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
20606
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20608
+ operands[3] = gen_highpart (SImode, operands[0]);
20609
+ operands[0] = gen_lowpart (SImode, operands[0]);
20610
+ operands[4] = gen_highpart (SImode, operands[1]);
20611
+ operands[1] = gen_lowpart (SImode, operands[1]);
20612
+ operands[5] = gen_highpart (SImode, operands[2]);
20613
+ operands[2] = gen_lowpart (SImode, operands[2]);
20615
[(set_attr "conds" "clob")
20616
(set_attr "length" "8")]
20618
@@ -1108,55 +1182,113 @@
20619
[(set_attr "length" "4")]
20622
-(define_insn "*subdi_di_zesidi"
20623
+(define_insn_and_split "*subdi_di_zesidi"
20624
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
20625
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
20627
(match_operand:SI 2 "s_register_operand" "r,r"))))
20628
(clobber (reg:CC CC_REGNUM))]
20630
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
20631
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
20632
+ "&& reload_completed"
20633
+ [(parallel [(set (reg:CC CC_REGNUM)
20634
+ (compare:CC (match_dup 1) (match_dup 2)))
20635
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
20636
+ (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5))
20637
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20639
+ operands[3] = gen_highpart (SImode, operands[0]);
20640
+ operands[0] = gen_lowpart (SImode, operands[0]);
20641
+ operands[4] = gen_highpart (SImode, operands[1]);
20642
+ operands[1] = gen_lowpart (SImode, operands[1]);
20643
+ operands[5] = GEN_INT (~0);
20645
[(set_attr "conds" "clob")
20646
(set_attr "length" "8")]
20649
-(define_insn "*subdi_di_sesidi"
20650
+(define_insn_and_split "*subdi_di_sesidi"
20651
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
20652
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
20654
(match_operand:SI 2 "s_register_operand" "r,r"))))
20655
(clobber (reg:CC CC_REGNUM))]
20657
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
20658
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
20659
+ "&& reload_completed"
20660
+ [(parallel [(set (reg:CC CC_REGNUM)
20661
+ (compare:CC (match_dup 1) (match_dup 2)))
20662
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
20663
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4)
20664
+ (ashiftrt:SI (match_dup 2)
20666
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20668
+ operands[3] = gen_highpart (SImode, operands[0]);
20669
+ operands[0] = gen_lowpart (SImode, operands[0]);
20670
+ operands[4] = gen_highpart (SImode, operands[1]);
20671
+ operands[1] = gen_lowpart (SImode, operands[1]);
20673
[(set_attr "conds" "clob")
20674
(set_attr "length" "8")]
20677
-(define_insn "*subdi_zesidi_di"
20678
+(define_insn_and_split "*subdi_zesidi_di"
20679
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
20680
(minus:DI (zero_extend:DI
20681
(match_operand:SI 2 "s_register_operand" "r,r"))
20682
(match_operand:DI 1 "s_register_operand" "0,r")))
20683
(clobber (reg:CC CC_REGNUM))]
20685
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
20686
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
20687
+ ; is equivalent to:
20688
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0"
20689
+ "&& reload_completed"
20690
+ [(parallel [(set (reg:CC CC_REGNUM)
20691
+ (compare:CC (match_dup 2) (match_dup 1)))
20692
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
20693
+ (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4))
20694
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20696
+ operands[3] = gen_highpart (SImode, operands[0]);
20697
+ operands[0] = gen_lowpart (SImode, operands[0]);
20698
+ operands[4] = gen_highpart (SImode, operands[1]);
20699
+ operands[1] = gen_lowpart (SImode, operands[1]);
20701
[(set_attr "conds" "clob")
20702
(set_attr "length" "8")]
20705
-(define_insn "*subdi_sesidi_di"
20706
+(define_insn_and_split "*subdi_sesidi_di"
20707
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
20708
(minus:DI (sign_extend:DI
20709
(match_operand:SI 2 "s_register_operand" "r,r"))
20710
(match_operand:DI 1 "s_register_operand" "0,r")))
20711
(clobber (reg:CC CC_REGNUM))]
20713
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
20714
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
20715
+ ; is equivalent to:
20716
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31"
20717
+ "&& reload_completed"
20718
+ [(parallel [(set (reg:CC CC_REGNUM)
20719
+ (compare:CC (match_dup 2) (match_dup 1)))
20720
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
20721
+ (set (match_dup 3) (minus:SI (minus:SI
20722
+ (ashiftrt:SI (match_dup 2)
20725
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20727
+ operands[3] = gen_highpart (SImode, operands[0]);
20728
+ operands[0] = gen_lowpart (SImode, operands[0]);
20729
+ operands[4] = gen_highpart (SImode, operands[1]);
20730
+ operands[1] = gen_lowpart (SImode, operands[1]);
20732
[(set_attr "conds" "clob")
20733
(set_attr "length" "8")]
20736
-(define_insn "*subdi_zesidi_zesidi"
20737
+(define_insn_and_split "*subdi_zesidi_zesidi"
20738
[(set (match_operand:DI 0 "s_register_operand" "=r")
20739
(minus:DI (zero_extend:DI
20740
(match_operand:SI 1 "s_register_operand" "r"))
20741
@@ -1164,7 +1296,17 @@
20742
(match_operand:SI 2 "s_register_operand" "r"))))
20743
(clobber (reg:CC CC_REGNUM))]
20745
- "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
20746
+ "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
20747
+ "&& reload_completed"
20748
+ [(parallel [(set (reg:CC CC_REGNUM)
20749
+ (compare:CC (match_dup 1) (match_dup 2)))
20750
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
20751
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1))
20752
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
20754
+ operands[3] = gen_highpart (SImode, operands[0]);
20755
+ operands[0] = gen_lowpart (SImode, operands[0]);
20757
[(set_attr "conds" "clob")
20758
(set_attr "length" "8")]
20760
@@ -1254,7 +1396,7 @@
20761
(set_attr "type" "simple_alu_imm,*,*")]
20764
-(define_insn "*subsi3_compare"
20765
+(define_insn "subsi3_compare"
20766
[(set (reg:CC CC_REGNUM)
20767
(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
20768
(match_operand:SI 2 "arm_rhs_operand" "I,r,r")))
20769
@@ -1269,29 +1411,6 @@
20770
(set_attr "type" "simple_alu_imm,*,*")]
20773
-(define_expand "decscc"
20774
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
20775
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
20776
- (match_operator:SI 2 "arm_comparison_operator"
20777
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
20782
-(define_insn "*arm_decscc"
20783
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
20784
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
20785
- (match_operator:SI 2 "arm_comparison_operator"
20786
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
20789
- sub%d2\\t%0, %1, #1
20790
- mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
20791
- [(set_attr "conds" "use")
20792
- (set_attr "length" "*,8")
20793
- (set_attr "type" "simple_alu_imm,*")]
20796
(define_expand "subsf3"
20797
[(set (match_operand:SF 0 "s_register_operand" "")
20798
(minus:SF (match_operand:SF 1 "s_register_operand" "")
20799
@@ -2024,13 +2143,58 @@
20803
-(define_insn "*anddi3_insn"
20804
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
20805
- (and:DI (match_operand:DI 1 "s_register_operand" "%0,r")
20806
- (match_operand:DI 2 "s_register_operand" "r,r")))]
20807
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
20809
- [(set_attr "length" "8")]
20810
+(define_insn_and_split "*anddi3_insn"
20811
+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
20812
+ (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0")
20813
+ (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))]
20814
+ "TARGET_32BIT && !TARGET_IWMMXT"
20816
+ switch (which_alternative)
20821
+ case 3: /* fall through */
20823
+ case 4: /* fall through */
20824
+ case 8: return "vand\t%P0, %P1, %P2";
20825
+ case 5: /* fall through */
20826
+ case 9: return neon_output_logic_immediate ("vand", &operands[2],
20827
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
20828
+ case 6: return "#";
20829
+ case 7: return "#";
20830
+ default: gcc_unreachable ();
20833
+ "TARGET_32BIT && !TARGET_IWMMXT"
20834
+ [(set (match_dup 3) (match_dup 4))
20835
+ (set (match_dup 5) (match_dup 6))]
20838
+ operands[3] = gen_lowpart (SImode, operands[0]);
20839
+ operands[5] = gen_highpart (SImode, operands[0]);
20841
+ operands[4] = simplify_gen_binary (AND, SImode,
20842
+ gen_lowpart (SImode, operands[1]),
20843
+ gen_lowpart (SImode, operands[2]));
20844
+ operands[6] = simplify_gen_binary (AND, SImode,
20845
+ gen_highpart (SImode, operands[1]),
20846
+ gen_highpart_mode (SImode, DImode, operands[2]));
20849
+ [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
20850
+ (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
20851
+ avoid_neon_for_64bits,avoid_neon_for_64bits")
20852
+ (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
20853
+ (set (attr "insn_enabled") (if_then_else
20854
+ (lt (symbol_ref "which_alternative")
20856
+ (if_then_else (match_test "!TARGET_NEON")
20857
+ (const_string "yes")
20858
+ (const_string "no"))
20859
+ (if_then_else (match_test "TARGET_NEON")
20860
+ (const_string "yes")
20861
+ (const_string "no"))))]
20864
(define_insn_and_split "*anddi_zesidi_di"
20865
@@ -3096,13 +3260,17 @@
20869
-(define_insn "*andsi_iorsi3_notsi"
20870
+(define_insn_and_split "*andsi_iorsi3_notsi"
20871
[(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
20872
(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
20873
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
20874
(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
20876
- "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
20877
+ "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
20878
+ "&& reload_completed"
20879
+ [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
20880
+ (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))]
20882
[(set_attr "length" "8")
20883
(set_attr "ce_count" "2")
20884
(set_attr "predicable" "yes")]
20885
@@ -3253,15 +3421,23 @@
20886
[(set_attr "predicable" "yes")]
20889
-(define_insn "*arm_smax_insn"
20890
+(define_insn_and_split "*arm_smax_insn"
20891
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
20892
(smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
20893
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
20894
(clobber (reg:CC CC_REGNUM))]
20897
- cmp\\t%1, %2\;movlt\\t%0, %2
20898
- cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
20900
+ ; cmp\\t%1, %2\;movlt\\t%0, %2
20901
+ ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
20903
+ [(set (reg:CC CC_REGNUM)
20904
+ (compare:CC (match_dup 1) (match_dup 2)))
20905
+ (set (match_dup 0)
20906
+ (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
20910
[(set_attr "conds" "clob")
20911
(set_attr "length" "8,12")]
20913
@@ -3293,15 +3469,23 @@
20914
[(set_attr "predicable" "yes")]
20917
-(define_insn "*arm_smin_insn"
20918
+(define_insn_and_split "*arm_smin_insn"
20919
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
20920
(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
20921
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
20922
(clobber (reg:CC CC_REGNUM))]
20925
- cmp\\t%1, %2\;movge\\t%0, %2
20926
- cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
20928
+ ; cmp\\t%1, %2\;movge\\t%0, %2
20929
+ ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
20931
+ [(set (reg:CC CC_REGNUM)
20932
+ (compare:CC (match_dup 1) (match_dup 2)))
20933
+ (set (match_dup 0)
20934
+ (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
20938
[(set_attr "conds" "clob")
20939
(set_attr "length" "8,12")]
20941
@@ -3316,16 +3500,24 @@
20945
-(define_insn "*arm_umaxsi3"
20946
+(define_insn_and_split "*arm_umaxsi3"
20947
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
20948
(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
20949
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
20950
(clobber (reg:CC CC_REGNUM))]
20953
- cmp\\t%1, %2\;movcc\\t%0, %2
20954
- cmp\\t%1, %2\;movcs\\t%0, %1
20955
- cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
20957
+ ; cmp\\t%1, %2\;movcc\\t%0, %2
20958
+ ; cmp\\t%1, %2\;movcs\\t%0, %1
20959
+ ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
20961
+ [(set (reg:CC CC_REGNUM)
20962
+ (compare:CC (match_dup 1) (match_dup 2)))
20963
+ (set (match_dup 0)
20964
+ (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
20968
[(set_attr "conds" "clob")
20969
(set_attr "length" "8,8,12")]
20971
@@ -3340,16 +3532,24 @@
20975
-(define_insn "*arm_uminsi3"
20976
+(define_insn_and_split "*arm_uminsi3"
20977
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
20978
(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
20979
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
20980
(clobber (reg:CC CC_REGNUM))]
20983
- cmp\\t%1, %2\;movcs\\t%0, %2
20984
- cmp\\t%1, %2\;movcc\\t%0, %1
20985
- cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
20987
+ ; cmp\\t%1, %2\;movcs\\t%0, %2
20988
+ ; cmp\\t%1, %2\;movcc\\t%0, %1
20989
+ ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
20991
+ [(set (reg:CC CC_REGNUM)
20992
+ (compare:CC (match_dup 1) (match_dup 2)))
20993
+ (set (match_dup 0)
20994
+ (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
20998
[(set_attr "conds" "clob")
20999
(set_attr "length" "8,8,12")]
21001
@@ -3360,7 +3560,7 @@
21002
[(match_operand:SI 1 "s_register_operand" "r")
21003
(match_operand:SI 2 "s_register_operand" "r")]))
21004
(clobber (reg:CC CC_REGNUM))]
21006
+ "TARGET_32BIT && optimize_insn_for_size_p()"
21008
operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
21009
operands[1], operands[2]);
21010
@@ -3423,6 +3623,50 @@
21014
+; Reject the frame pointer in operand[1], since reloading this after
21015
+; it has been eliminated can cause carnage.
21016
+(define_insn_and_split "*minmax_arithsi_non_canon"
21017
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r")
21019
+ (match_operand:SI 1 "s_register_operand" "0,?r")
21020
+ (match_operator:SI 4 "minmax_operator"
21021
+ [(match_operand:SI 2 "s_register_operand" "r,r")
21022
+ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
21023
+ (clobber (reg:CC CC_REGNUM))]
21024
+ "TARGET_32BIT && !arm_eliminable_register (operands[1])"
21026
+ "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
21027
+ [(set (reg:CC CC_REGNUM)
21028
+ (compare:CC (match_dup 2) (match_dup 3)))
21030
+ (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)])
21031
+ (set (match_dup 0)
21032
+ (minus:SI (match_dup 1)
21034
+ (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
21035
+ (set (match_dup 0)
21036
+ (minus:SI (match_dup 1)
21037
+ (match_dup 3))))]
21039
+ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
21040
+ operands[2], operands[3]);
21041
+ enum rtx_code rc = minmax_code (operands[4]);
21042
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode,
21043
+ operands[2], operands[3]);
21045
+ if (mode == CCFPmode || mode == CCFPEmode)
21046
+ rc = reverse_condition_maybe_unordered (rc);
21048
+ rc = reverse_condition (rc);
21049
+ operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
21051
+ [(set_attr "conds" "clob")
21052
+ (set (attr "length")
21053
+ (if_then_else (eq_attr "is_thumb" "yes")
21055
+ (const_int 12)))]
21058
(define_code_iterator SAT [smin smax])
21059
(define_code_iterator SATrev [smin smax])
21060
(define_code_attr SATlo [(smin "1") (smax "2")])
21061
@@ -3533,13 +3777,26 @@
21065
-(define_insn "arm_ashldi3_1bit"
21066
+(define_insn_and_split "arm_ashldi3_1bit"
21067
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
21068
(ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
21070
(clobber (reg:CC CC_REGNUM))]
21072
- "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
21073
+ "#" ; "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
21074
+ "&& reload_completed"
21075
+ [(parallel [(set (reg:CC CC_REGNUM)
21076
+ (compare:CC (ashift:SI (match_dup 1) (const_int 1))
21078
+ (set (match_dup 0) (ashift:SI (match_dup 1) (const_int 1)))])
21079
+ (set (match_dup 2) (plus:SI (plus:SI (match_dup 3) (match_dup 3))
21080
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
21082
+ operands[2] = gen_highpart (SImode, operands[0]);
21083
+ operands[0] = gen_lowpart (SImode, operands[0]);
21084
+ operands[3] = gen_highpart (SImode, operands[1]);
21085
+ operands[1] = gen_lowpart (SImode, operands[1]);
21087
[(set_attr "conds" "clob")
21088
(set_attr "length" "8")]
21090
@@ -3615,18 +3872,43 @@
21094
-(define_insn "arm_ashrdi3_1bit"
21095
+(define_insn_and_split "arm_ashrdi3_1bit"
21096
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
21097
(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
21099
(clobber (reg:CC CC_REGNUM))]
21101
- "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
21102
+ "#" ; "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
21103
+ "&& reload_completed"
21104
+ [(parallel [(set (reg:CC CC_REGNUM)
21105
+ (compare:CC (ashiftrt:SI (match_dup 3) (const_int 1))
21107
+ (set (match_dup 2) (ashiftrt:SI (match_dup 3) (const_int 1)))])
21108
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
21109
+ (reg:CC_C CC_REGNUM)]
21112
+ operands[2] = gen_highpart (SImode, operands[0]);
21113
+ operands[0] = gen_lowpart (SImode, operands[0]);
21114
+ operands[3] = gen_highpart (SImode, operands[1]);
21115
+ operands[1] = gen_lowpart (SImode, operands[1]);
21117
[(set_attr "conds" "clob")
21118
- (set_attr "insn" "mov")
21119
(set_attr "length" "8")]
21122
+(define_insn "*rrx"
21123
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
21124
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
21125
+ (reg:CC_C CC_REGNUM)]
21128
+ "mov\\t%0, %1, rrx"
21129
+ [(set_attr "conds" "use")
21130
+ (set_attr "insn" "mov")
21131
+ (set_attr "type" "alu_shift")]
21134
(define_expand "ashrsi3"
21135
[(set (match_operand:SI 0 "s_register_operand" "")
21136
(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
21137
@@ -3695,15 +3977,28 @@
21141
-(define_insn "arm_lshrdi3_1bit"
21142
+(define_insn_and_split "arm_lshrdi3_1bit"
21143
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
21144
(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
21146
(clobber (reg:CC CC_REGNUM))]
21148
- "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
21149
+ "#" ; "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
21150
+ "&& reload_completed"
21151
+ [(parallel [(set (reg:CC CC_REGNUM)
21152
+ (compare:CC (lshiftrt:SI (match_dup 3) (const_int 1))
21154
+ (set (match_dup 2) (lshiftrt:SI (match_dup 3) (const_int 1)))])
21155
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
21156
+ (reg:CC_C CC_REGNUM)]
21159
+ operands[2] = gen_highpart (SImode, operands[0]);
21160
+ operands[0] = gen_lowpart (SImode, operands[0]);
21161
+ operands[3] = gen_highpart (SImode, operands[1]);
21162
+ operands[1] = gen_lowpart (SImode, operands[1]);
21164
[(set_attr "conds" "clob")
21165
- (set_attr "insn" "mov")
21166
(set_attr "length" "8")]
21169
@@ -3791,6 +4086,23 @@
21170
(const_string "alu_shift_reg")))]
21173
+(define_insn "*shiftsi3_compare"
21174
+ [(set (reg:CC CC_REGNUM)
21175
+ (compare:CC (match_operator:SI 3 "shift_operator"
21176
+ [(match_operand:SI 1 "s_register_operand" "r")
21177
+ (match_operand:SI 2 "arm_rhs_operand" "rM")])
21179
+ (set (match_operand:SI 0 "s_register_operand" "=r")
21180
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
21182
+ "* return arm_output_shift(operands, 1);"
21183
+ [(set_attr "conds" "set")
21184
+ (set_attr "shift" "1")
21185
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
21186
+ (const_string "alu_shift")
21187
+ (const_string "alu_shift_reg")))]
21190
(define_insn "*shiftsi3_compare0"
21191
[(set (reg:CC_NOOV CC_REGNUM)
21192
(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
21193
@@ -4090,6 +4402,64 @@
21194
(set_attr "predicable" "yes")
21195
(set_attr "type" "store1")])
21197
+;; Unaligned double-word load and store.
21198
+;; Split after reload into two unaligned single-word accesses.
21199
+;; It prevents lower_subreg from splitting some other aligned
21200
+;; double-word accesses too early. Used for internal memcpy.
21202
+(define_insn_and_split "unaligned_loaddi"
21203
+ [(set (match_operand:DI 0 "s_register_operand" "=l,r")
21204
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
21205
+ UNSPEC_UNALIGNED_LOAD))]
21206
+ "unaligned_access && TARGET_32BIT"
21208
+ "&& reload_completed"
21209
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
21210
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
21212
+ operands[2] = gen_highpart (SImode, operands[0]);
21213
+ operands[0] = gen_lowpart (SImode, operands[0]);
21214
+ operands[3] = gen_highpart (SImode, operands[1]);
21215
+ operands[1] = gen_lowpart (SImode, operands[1]);
21217
+ /* If the first destination register overlaps with the base address,
21218
+ swap the order in which the loads are emitted. */
21219
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
21221
+ rtx tmp = operands[1];
21222
+ operands[1] = operands[3];
21223
+ operands[3] = tmp;
21224
+ tmp = operands[0];
21225
+ operands[0] = operands[2];
21226
+ operands[2] = tmp;
21229
+ [(set_attr "arch" "t2,any")
21230
+ (set_attr "length" "4,8")
21231
+ (set_attr "predicable" "yes")
21232
+ (set_attr "type" "load2")])
21234
+(define_insn_and_split "unaligned_storedi"
21235
+ [(set (match_operand:DI 0 "memory_operand" "=o,o")
21236
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
21237
+ UNSPEC_UNALIGNED_STORE))]
21238
+ "unaligned_access && TARGET_32BIT"
21240
+ "&& reload_completed"
21241
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
21242
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
21244
+ operands[2] = gen_highpart (SImode, operands[0]);
21245
+ operands[0] = gen_lowpart (SImode, operands[0]);
21246
+ operands[3] = gen_highpart (SImode, operands[1]);
21247
+ operands[1] = gen_lowpart (SImode, operands[1]);
21249
+ [(set_attr "arch" "t2,any")
21250
+ (set_attr "length" "4,8")
21251
+ (set_attr "predicable" "yes")
21252
+ (set_attr "type" "store2")])
21255
(define_insn "*extv_reg"
21256
[(set (match_operand:SI 0 "s_register_operand" "=r")
21257
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
21258
@@ -4154,12 +4524,24 @@
21260
;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
21261
;; The first alternative allows the common case of a *full* overlap.
21262
-(define_insn "*arm_negdi2"
21263
+(define_insn_and_split "*arm_negdi2"
21264
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
21265
(neg:DI (match_operand:DI 1 "s_register_operand" "0,r")))
21266
(clobber (reg:CC CC_REGNUM))]
21268
- "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
21269
+ "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
21270
+ "&& reload_completed"
21271
+ [(parallel [(set (reg:CC CC_REGNUM)
21272
+ (compare:CC (const_int 0) (match_dup 1)))
21273
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
21274
+ (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
21275
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
21277
+ operands[2] = gen_highpart (SImode, operands[0]);
21278
+ operands[0] = gen_lowpart (SImode, operands[0]);
21279
+ operands[3] = gen_highpart (SImode, operands[1]);
21280
+ operands[1] = gen_lowpart (SImode, operands[1]);
21282
[(set_attr "conds" "clob")
21283
(set_attr "length" "8")]
21285
@@ -4209,6 +4591,73 @@
21286
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
21289
+;; Negate an extended 32-bit value.
21290
+(define_insn_and_split "*negdi_extendsidi"
21291
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r,l,&l")
21292
+ (neg:DI (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r,0,l"))))
21293
+ (clobber (reg:CC CC_REGNUM))]
21295
+ "#" ; rsb\\t%Q0, %1, #0\;asr\\t%R0, %Q0, #31
21296
+ "&& reload_completed"
21299
+ operands[2] = gen_highpart (SImode, operands[0]);
21300
+ operands[0] = gen_lowpart (SImode, operands[0]);
21301
+ rtx tmp = gen_rtx_SET (VOIDmode,
21303
+ gen_rtx_MINUS (SImode,
21312
+ /* Set the flags, to emit the short encoding in Thumb2. */
21313
+ rtx flags = gen_rtx_SET (VOIDmode,
21314
+ gen_rtx_REG (CCmode, CC_REGNUM),
21315
+ gen_rtx_COMPARE (CCmode,
21318
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
21323
+ emit_insn (gen_rtx_SET (VOIDmode,
21325
+ gen_rtx_ASHIFTRT (SImode,
21330
+ [(set_attr "length" "8,8,4,4")
21331
+ (set_attr "arch" "a,a,t2,t2")]
21334
+(define_insn_and_split "*negdi_zero_extendsidi"
21335
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r")
21336
+ (neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))))
21337
+ (clobber (reg:CC CC_REGNUM))]
21339
+ "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0"
21340
+ ;; Don't care what register is input to sbc,
21341
+ ;; since we just just need to propagate the carry.
21342
+ "&& reload_completed"
21343
+ [(parallel [(set (reg:CC CC_REGNUM)
21344
+ (compare:CC (const_int 0) (match_dup 1)))
21345
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
21346
+ (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2))
21347
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
21349
+ operands[2] = gen_highpart (SImode, operands[0]);
21350
+ operands[0] = gen_lowpart (SImode, operands[0]);
21352
+ [(set_attr "conds" "clob")
21353
+ (set_attr "length" "8")] ;; length in thumb is 4
21356
;; abssi2 doesn't really clobber the condition codes if a different register
21357
;; is being set. To keep things simple, assume during rtl manipulations that
21358
;; it does, but tell the final scan operator the truth. Similarly for
21359
@@ -4227,14 +4676,67 @@
21360
operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
21363
-(define_insn "*arm_abssi2"
21364
+(define_insn_and_split "*arm_abssi2"
21365
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
21366
(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
21367
(clobber (reg:CC CC_REGNUM))]
21370
- cmp\\t%0, #0\;rsblt\\t%0, %0, #0
21371
- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
21373
+ "&& reload_completed"
21376
+ /* if (which_alternative == 0) */
21377
+ if (REGNO(operands[0]) == REGNO(operands[1]))
21379
+ /* Emit the pattern:
21380
+ cmp\\t%0, #0\;rsblt\\t%0, %0, #0
21381
+ [(set (reg:CC CC_REGNUM)
21382
+ (compare:CC (match_dup 0) (const_int 0)))
21383
+ (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0))
21384
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))]
21386
+ emit_insn (gen_rtx_SET (VOIDmode,
21387
+ gen_rtx_REG (CCmode, CC_REGNUM),
21388
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
21389
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21390
+ (gen_rtx_LT (SImode,
21391
+ gen_rtx_REG (CCmode, CC_REGNUM),
21393
+ (gen_rtx_SET (VOIDmode,
21395
+ (gen_rtx_MINUS (SImode,
21397
+ operands[1]))))));
21402
+ /* Emit the pattern:
21403
+ alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
21404
+ [(set (match_dup 0)
21405
+ (xor:SI (match_dup 1)
21406
+ (ashiftrt:SI (match_dup 1) (const_int 31))))
21407
+ (set (match_dup 0)
21408
+ (minus:SI (match_dup 0)
21409
+ (ashiftrt:SI (match_dup 1) (const_int 31))))]
21411
+ emit_insn (gen_rtx_SET (VOIDmode,
21413
+ gen_rtx_XOR (SImode,
21414
+ gen_rtx_ASHIFTRT (SImode,
21418
+ emit_insn (gen_rtx_SET (VOIDmode,
21420
+ gen_rtx_MINUS (SImode,
21422
+ gen_rtx_ASHIFTRT (SImode,
21424
+ GEN_INT (31)))));
21428
[(set_attr "conds" "clob,*")
21429
(set_attr "shift" "1")
21430
(set_attr "predicable" "no, yes")
21431
@@ -4255,14 +4757,56 @@
21432
[(set_attr "length" "6")]
21435
-(define_insn "*arm_neg_abssi2"
21436
+(define_insn_and_split "*arm_neg_abssi2"
21437
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
21438
(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
21439
(clobber (reg:CC CC_REGNUM))]
21442
- cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
21443
- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
21445
+ "&& reload_completed"
21448
+ /* if (which_alternative == 0) */
21449
+ if (REGNO (operands[0]) == REGNO (operands[1]))
21451
+ /* Emit the pattern:
21452
+ cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
21454
+ emit_insn (gen_rtx_SET (VOIDmode,
21455
+ gen_rtx_REG (CCmode, CC_REGNUM),
21456
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
21457
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21458
+ gen_rtx_GT (SImode,
21459
+ gen_rtx_REG (CCmode, CC_REGNUM),
21461
+ gen_rtx_SET (VOIDmode,
21463
+ (gen_rtx_MINUS (SImode,
21465
+ operands[1])))));
21469
+ /* Emit the pattern:
21470
+ eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
21472
+ emit_insn (gen_rtx_SET (VOIDmode,
21474
+ gen_rtx_XOR (SImode,
21475
+ gen_rtx_ASHIFTRT (SImode,
21479
+ emit_insn (gen_rtx_SET (VOIDmode,
21481
+ gen_rtx_MINUS (SImode,
21482
+ gen_rtx_ASHIFTRT (SImode,
21489
[(set_attr "conds" "clob,*")
21490
(set_attr "shift" "1")
21491
(set_attr "predicable" "no, yes")
21492
@@ -4330,7 +4874,7 @@
21493
[(set_attr "length" "*,8,8,*")
21494
(set_attr "predicable" "no,yes,yes,no")
21495
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
21496
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
21497
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
21500
(define_expand "one_cmplsi2"
21501
@@ -4498,7 +5042,7 @@
21502
"TARGET_32BIT <qhs_zextenddi_cond>"
21504
[(set_attr "length" "8,4,8,8")
21505
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")
21506
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
21507
(set_attr "ce_count" "2")
21508
(set_attr "predicable" "yes")]
21510
@@ -4513,7 +5057,7 @@
21511
(set_attr "ce_count" "2")
21512
(set_attr "shift" "1")
21513
(set_attr "predicable" "yes")
21514
- (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
21515
+ (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
21518
;; Splits for all extensions to DImode
21519
@@ -5313,8 +5857,8 @@
21522
(define_insn "*arm_movdi"
21523
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
21524
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))]
21525
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
21526
+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))]
21528
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
21530
@@ -6738,8 +7282,8 @@
21533
(define_insn "*movdf_soft_insn"
21534
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
21535
- (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
21536
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
21537
+ (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
21538
"TARGET_32BIT && TARGET_SOFT_FLOAT
21539
&& ( register_operand (operands[0], DFmode)
21540
|| register_operand (operands[1], DFmode))"
21541
@@ -6869,10 +7413,18 @@
21542
(match_operand:BLK 1 "general_operand" "")
21543
(match_operand:SI 2 "const_int_operand" "")
21544
(match_operand:SI 3 "const_int_operand" "")]
21550
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
21551
+ && !optimize_function_for_size_p (cfun))
21553
+ if (gen_movmem_ldrd_strd (operands))
21558
if (arm_gen_movmemqi (operands))
21561
@@ -7617,23 +8169,64 @@
21562
;; if-conversion can not reduce to a conditional compare, so we do
21565
-(define_insn "*arm_cmpdi_insn"
21566
+(define_insn_and_split "*arm_cmpdi_insn"
21567
[(set (reg:CC_NCV CC_REGNUM)
21568
(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
21569
(match_operand:DI 1 "arm_di_operand" "rDi")))
21570
(clobber (match_scratch:SI 2 "=r"))]
21572
- "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
21573
+ "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
21574
+ "&& reload_completed"
21575
+ [(set (reg:CC CC_REGNUM)
21576
+ (compare:CC (match_dup 0) (match_dup 1)))
21577
+ (parallel [(set (reg:CC CC_REGNUM)
21578
+ (compare:CC (match_dup 3) (match_dup 4)))
21579
+ (set (match_dup 2)
21580
+ (minus:SI (match_dup 5)
21581
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
21583
+ operands[3] = gen_highpart (SImode, operands[0]);
21584
+ operands[0] = gen_lowpart (SImode, operands[0]);
21585
+ if (CONST_INT_P (operands[1]))
21587
+ operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode,
21590
+ operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]);
21594
+ operands[4] = gen_highpart (SImode, operands[1]);
21595
+ operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]);
21597
+ operands[1] = gen_lowpart (SImode, operands[1]);
21598
+ operands[2] = gen_lowpart (SImode, operands[2]);
21600
[(set_attr "conds" "set")
21601
(set_attr "length" "8")]
21604
-(define_insn "*arm_cmpdi_unsigned"
21605
+(define_insn_and_split "*arm_cmpdi_unsigned"
21606
[(set (reg:CC_CZ CC_REGNUM)
21607
(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
21608
(match_operand:DI 1 "arm_di_operand" "rDi")))]
21610
- "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
21611
+ "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
21612
+ "&& reload_completed"
21613
+ [(set (reg:CC CC_REGNUM)
21614
+ (compare:CC (match_dup 2) (match_dup 3)))
21615
+ (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0))
21616
+ (set (reg:CC CC_REGNUM)
21617
+ (compare:CC (match_dup 0) (match_dup 1))))]
21619
+ operands[2] = gen_highpart (SImode, operands[0]);
21620
+ operands[0] = gen_lowpart (SImode, operands[0]);
21621
+ if (CONST_INT_P (operands[1]))
21622
+ operands[3] = gen_highpart_mode (SImode, DImode, operands[1]);
21624
+ operands[3] = gen_highpart (SImode, operands[1]);
21625
+ operands[1] = gen_lowpart (SImode, operands[1]);
21627
[(set_attr "conds" "set")
21628
(set_attr "length" "8")]
21630
@@ -7758,36 +8351,56 @@
21631
operands[3] = const0_rtx;"
21634
-(define_insn "*mov_scc"
21635
+(define_insn_and_split "*mov_scc"
21636
[(set (match_operand:SI 0 "s_register_operand" "=r")
21637
(match_operator:SI 1 "arm_comparison_operator"
21638
[(match_operand 2 "cc_register" "") (const_int 0)]))]
21640
- "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
21641
+ "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
21643
+ [(set (match_dup 0)
21644
+ (if_then_else:SI (match_dup 1)
21648
[(set_attr "conds" "use")
21649
- (set_attr "insn" "mov")
21650
(set_attr "length" "8")]
21653
-(define_insn "*mov_negscc"
21654
+(define_insn_and_split "*mov_negscc"
21655
[(set (match_operand:SI 0 "s_register_operand" "=r")
21656
(neg:SI (match_operator:SI 1 "arm_comparison_operator"
21657
[(match_operand 2 "cc_register" "") (const_int 0)])))]
21659
- "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
21660
+ "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
21662
+ [(set (match_dup 0)
21663
+ (if_then_else:SI (match_dup 1)
21667
+ operands[3] = GEN_INT (~0);
21669
[(set_attr "conds" "use")
21670
- (set_attr "insn" "mov")
21671
(set_attr "length" "8")]
21674
-(define_insn "*mov_notscc"
21675
+(define_insn_and_split "*mov_notscc"
21676
[(set (match_operand:SI 0 "s_register_operand" "=r")
21677
(not:SI (match_operator:SI 1 "arm_comparison_operator"
21678
[(match_operand 2 "cc_register" "") (const_int 0)])))]
21680
- "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
21681
+ "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
21683
+ [(set (match_dup 0)
21684
+ (if_then_else:SI (match_dup 1)
21688
+ operands[3] = GEN_INT (~1);
21689
+ operands[4] = GEN_INT (~0);
21691
[(set_attr "conds" "use")
21692
- (set_attr "insn" "mov")
21693
(set_attr "length" "8")]
21696
@@ -8110,7 +8723,40 @@
21700
-(define_insn "*movsicc_insn"
21701
+(define_insn "*cmov<mode>"
21702
+ [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
21703
+ (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator"
21704
+ [(match_operand 2 "cc_register" "") (const_int 0)])
21705
+ (match_operand:SDF 3 "s_register_operand"
21706
+ "<F_constraint>")
21707
+ (match_operand:SDF 4 "s_register_operand"
21708
+ "<F_constraint>")))]
21709
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
21712
+ enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
21719
+ return \"vsel%d1.<V_if_elem>\\t%<V_reg>0, %<V_reg>3, %<V_reg>4\";
21724
+ return \"vsel%D1.<V_if_elem>\\t%<V_reg>0, %<V_reg>4, %<V_reg>3\";
21726
+ gcc_unreachable ();
21730
+ [(set_attr "conds" "use")
21731
+ (set_attr "type" "f_sel<vfp_type>")]
21734
+(define_insn_and_split "*movsicc_insn"
21735
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
21737
(match_operator 3 "arm_comparison_operator"
21738
@@ -8123,10 +8769,45 @@
21742
- mov%d3\\t%0, %1\;mov%D3\\t%0, %2
21743
- mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
21744
- mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
21745
- mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
21750
+ ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2
21751
+ ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
21752
+ ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
21753
+ ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
21754
+ "&& reload_completed"
21757
+ enum rtx_code rev_code;
21758
+ enum machine_mode mode;
21761
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21763
+ gen_rtx_SET (VOIDmode,
21767
+ rev_code = GET_CODE (operands[3]);
21768
+ mode = GET_MODE (operands[4]);
21769
+ if (mode == CCFPmode || mode == CCFPEmode)
21770
+ rev_code = reverse_condition_maybe_unordered (rev_code);
21772
+ rev_code = reverse_condition (rev_code);
21774
+ rev_cond = gen_rtx_fmt_ee (rev_code,
21778
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
21780
+ gen_rtx_SET (VOIDmode,
21785
[(set_attr "length" "4,4,4,4,8,8,8,8")
21786
(set_attr "conds" "use")
21787
(set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")
21788
@@ -8255,7 +8936,7 @@
21789
(match_operand 1 "" ""))
21790
(use (match_operand 2 "" ""))
21791
(clobber (reg:SI LR_REGNUM))]
21792
- "TARGET_ARM && arm_arch5"
21793
+ "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
21795
[(set_attr "type" "call")]
21797
@@ -8265,7 +8946,7 @@
21798
(match_operand 1 "" ""))
21799
(use (match_operand 2 "" ""))
21800
(clobber (reg:SI LR_REGNUM))]
21801
- "TARGET_ARM && !arm_arch5"
21802
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
21804
return output_call (operands);
21806
@@ -8284,7 +8965,7 @@
21807
(match_operand 1 "" ""))
21808
(use (match_operand 2 "" ""))
21809
(clobber (reg:SI LR_REGNUM))]
21810
- "TARGET_ARM && !arm_arch5"
21811
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
21813
return output_call_mem (operands);
21815
@@ -8297,7 +8978,7 @@
21816
(match_operand 1 "" ""))
21817
(use (match_operand 2 "" ""))
21818
(clobber (reg:SI LR_REGNUM))]
21819
- "TARGET_THUMB1 && arm_arch5"
21820
+ "TARGET_THUMB1 && arm_arch5 && !SIBLING_CALL_P (insn)"
21822
[(set_attr "length" "2")
21823
(set_attr "type" "call")]
21824
@@ -8308,7 +8989,7 @@
21825
(match_operand 1 "" ""))
21826
(use (match_operand 2 "" ""))
21827
(clobber (reg:SI LR_REGNUM))]
21828
- "TARGET_THUMB1 && !arm_arch5"
21829
+ "TARGET_THUMB1 && !arm_arch5 && !SIBLING_CALL_P (insn)"
21832
if (!TARGET_CALLER_INTERWORKING)
21833
@@ -8367,7 +9048,7 @@
21834
(match_operand 2 "" "")))
21835
(use (match_operand 3 "" ""))
21836
(clobber (reg:SI LR_REGNUM))]
21837
- "TARGET_ARM && arm_arch5"
21838
+ "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
21840
[(set_attr "type" "call")]
21842
@@ -8378,7 +9059,7 @@
21843
(match_operand 2 "" "")))
21844
(use (match_operand 3 "" ""))
21845
(clobber (reg:SI LR_REGNUM))]
21846
- "TARGET_ARM && !arm_arch5"
21847
+ "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
21849
return output_call (&operands[1]);
21851
@@ -8394,7 +9075,8 @@
21852
(match_operand 2 "" "")))
21853
(use (match_operand 3 "" ""))
21854
(clobber (reg:SI LR_REGNUM))]
21855
- "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))"
21856
+ "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))
21857
+ && !SIBLING_CALL_P (insn)"
21859
return output_call_mem (&operands[1]);
21861
@@ -8444,6 +9126,7 @@
21862
(use (match_operand 2 "" ""))
21863
(clobber (reg:SI LR_REGNUM))]
21865
+ && !SIBLING_CALL_P (insn)
21866
&& (GET_CODE (operands[0]) == SYMBOL_REF)
21867
&& !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
21869
@@ -8460,6 +9143,7 @@
21870
(use (match_operand 3 "" ""))
21871
(clobber (reg:SI LR_REGNUM))]
21873
+ && !SIBLING_CALL_P (insn)
21874
&& (GET_CODE (operands[1]) == SYMBOL_REF)
21875
&& !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
21877
@@ -8505,6 +9189,10 @@
21881
+ if (!REG_P (XEXP (operands[0], 0))
21882
+ && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
21883
+ XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
21885
if (operands[2] == NULL_RTX)
21886
operands[2] = const0_rtx;
21888
@@ -8519,47 +9207,67 @@
21892
+ if (!REG_P (XEXP (operands[1], 0)) &&
21893
+ (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
21894
+ XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
21896
if (operands[3] == NULL_RTX)
21897
operands[3] = const0_rtx;
21901
(define_insn "*sibcall_insn"
21902
- [(call (mem:SI (match_operand:SI 0 "" "X"))
21903
+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs,Ss"))
21904
(match_operand 1 "" ""))
21906
(use (match_operand 2 "" ""))]
21907
- "TARGET_32BIT && GET_CODE (operands[0]) == SYMBOL_REF"
21908
+ "TARGET_32BIT && SIBLING_CALL_P (insn)"
21910
- return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
21911
+ if (which_alternative == 1)
21912
+ return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
21915
+ if (arm_arch5 || arm_arch4t)
21916
+ return \" bx\\t%0\\t%@ indirect register sibling call\";
21918
+ return \"mov%?\\t%|pc, %0\\t%@ indirect register sibling call\";
21921
[(set_attr "type" "call")]
21924
(define_insn "*sibcall_value_insn"
21925
- [(set (match_operand 0 "" "")
21926
- (call (mem:SI (match_operand:SI 1 "" "X"))
21927
+ [(set (match_operand 0 "s_register_operand" "")
21928
+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,Ss"))
21929
(match_operand 2 "" "")))
21931
(use (match_operand 3 "" ""))]
21932
- "TARGET_32BIT && GET_CODE (operands[1]) == SYMBOL_REF"
21933
+ "TARGET_32BIT && SIBLING_CALL_P (insn)"
21935
- return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
21936
+ if (which_alternative == 1)
21937
+ return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
21940
+ if (arm_arch5 || arm_arch4t)
21941
+ return \"bx\\t%1\";
21943
+ return \"mov%?\\t%|pc, %1\\t@ indirect sibling call \";
21946
[(set_attr "type" "call")]
21949
-(define_expand "return"
21951
+(define_expand "<return_str>return"
21953
"(TARGET_ARM || (TARGET_THUMB2
21954
&& ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
21955
&& !IS_STACKALIGN (arm_current_func_type ())))
21956
- && USE_RETURN_INSN (FALSE)"
21957
+ <return_cond_false>"
21962
- thumb2_expand_return ();
21963
+ thumb2_expand_return (<return_simple_p>);
21967
@@ -8584,13 +9292,13 @@
21968
(set_attr "predicable" "yes")]
21971
-(define_insn "*cond_return"
21972
+(define_insn "*cond_<return_str>return"
21974
(if_then_else (match_operator 0 "arm_comparison_operator"
21975
[(match_operand 1 "cc_register" "") (const_int 0)])
21979
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
21980
+ "TARGET_ARM <return_cond_true>"
21983
if (arm_ccfsm_state == 2)
21984
@@ -8598,20 +9306,21 @@
21985
arm_ccfsm_state += 2;
21988
- return output_return_instruction (operands[0], true, false, false);
21989
+ return output_return_instruction (operands[0], true, false,
21990
+ <return_simple_p>);
21992
[(set_attr "conds" "use")
21993
(set_attr "length" "12")
21994
(set_attr "type" "load1")]
21997
-(define_insn "*cond_return_inverted"
21998
+(define_insn "*cond_<return_str>return_inverted"
22000
(if_then_else (match_operator 0 "arm_comparison_operator"
22001
[(match_operand 1 "cc_register" "") (const_int 0)])
22004
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
22006
+ "TARGET_ARM <return_cond_true>"
22009
if (arm_ccfsm_state == 2)
22010
@@ -8619,7 +9328,8 @@
22011
arm_ccfsm_state += 2;
22014
- return output_return_instruction (operands[0], true, true, false);
22015
+ return output_return_instruction (operands[0], true, true,
22016
+ <return_simple_p>);
22018
[(set_attr "conds" "use")
22019
(set_attr "length" "12")
22020
@@ -9095,27 +9805,64 @@
22021
(set_attr "type" "alu_shift,alu_shift_reg")])
22024
-(define_insn "*and_scc"
22025
+(define_insn_and_split "*and_scc"
22026
[(set (match_operand:SI 0 "s_register_operand" "=r")
22027
(and:SI (match_operator:SI 1 "arm_comparison_operator"
22028
- [(match_operand 3 "cc_register" "") (const_int 0)])
22029
- (match_operand:SI 2 "s_register_operand" "r")))]
22030
+ [(match_operand 2 "cc_register" "") (const_int 0)])
22031
+ (match_operand:SI 3 "s_register_operand" "r")))]
22033
- "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
22034
+ "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
22035
+ "&& reload_completed"
22036
+ [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
22037
+ (cond_exec (match_dup 4) (set (match_dup 0)
22038
+ (and:SI (match_dup 3) (const_int 1))))]
22040
+ enum machine_mode mode = GET_MODE (operands[2]);
22041
+ enum rtx_code rc = GET_CODE (operands[1]);
22043
+ /* Note that operands[4] is the same as operands[1],
22044
+ but with VOIDmode as the result. */
22045
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
22046
+ if (mode == CCFPmode || mode == CCFPEmode)
22047
+ rc = reverse_condition_maybe_unordered (rc);
22049
+ rc = reverse_condition (rc);
22050
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
22052
[(set_attr "conds" "use")
22053
(set_attr "insn" "mov")
22054
(set_attr "length" "8")]
22057
-(define_insn "*ior_scc"
22058
+(define_insn_and_split "*ior_scc"
22059
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
22060
- (ior:SI (match_operator:SI 2 "arm_comparison_operator"
22061
- [(match_operand 3 "cc_register" "") (const_int 0)])
22062
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
22063
+ (ior:SI (match_operator:SI 1 "arm_comparison_operator"
22064
+ [(match_operand 2 "cc_register" "") (const_int 0)])
22065
+ (match_operand:SI 3 "s_register_operand" "0,?r")))]
22068
- orr%d2\\t%0, %1, #1
22069
- mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
22070
+ orr%d1\\t%0, %3, #1
22072
+ "&& reload_completed
22073
+ && REGNO (operands [0]) != REGNO (operands[3])"
22074
+ ;; && which_alternative == 1
22075
+ ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
22076
+ [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
22077
+ (cond_exec (match_dup 4) (set (match_dup 0)
22078
+ (ior:SI (match_dup 3) (const_int 1))))]
22080
+ enum machine_mode mode = GET_MODE (operands[2]);
22081
+ enum rtx_code rc = GET_CODE (operands[1]);
22083
+ /* Note that operands[4] is the same as operands[1],
22084
+ but with VOIDmode as the result. */
22085
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
22086
+ if (mode == CCFPmode || mode == CCFPEmode)
22087
+ rc = reverse_condition_maybe_unordered (rc);
22089
+ rc = reverse_condition (rc);
22090
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
22092
[(set_attr "conds" "use")
22093
(set_attr "length" "4,8")]
22095
@@ -9822,24 +10569,75 @@
22097
;; ??? The conditional patterns above need checking for Thumb-2 usefulness
22099
-(define_insn "*negscc"
22100
+(define_insn_and_split "*negscc"
22101
[(set (match_operand:SI 0 "s_register_operand" "=r")
22102
(neg:SI (match_operator 3 "arm_comparison_operator"
22103
[(match_operand:SI 1 "s_register_operand" "r")
22104
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
22105
(clobber (reg:CC CC_REGNUM))]
22108
- if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
22109
- return \"mov\\t%0, %1, asr #31\";
22111
+ "&& reload_completed"
22114
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
22116
- if (GET_CODE (operands[3]) == NE)
22117
- return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\";
22118
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
22120
+ /* Emit mov\\t%0, %1, asr #31 */
22121
+ emit_insn (gen_rtx_SET (VOIDmode,
22123
+ gen_rtx_ASHIFTRT (SImode,
22128
+ else if (GET_CODE (operands[3]) == NE)
22130
+ /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */
22131
+ if (CONST_INT_P (operands[2]))
22132
+ emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
22133
+ GEN_INT (- INTVAL (operands[2]))));
22135
+ emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
22137
- output_asm_insn (\"cmp\\t%1, %2\", operands);
22138
- output_asm_insn (\"mov%D3\\t%0, #0\", operands);
22139
- return \"mvn%d3\\t%0, #0\";
22141
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
22142
+ gen_rtx_NE (SImode,
22145
+ gen_rtx_SET (SImode,
22152
+ /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
22153
+ emit_insn (gen_rtx_SET (VOIDmode,
22155
+ gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
22156
+ enum rtx_code rc = GET_CODE (operands[3]);
22158
+ rc = reverse_condition (rc);
22159
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
22160
+ gen_rtx_fmt_ee (rc,
22164
+ gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
22165
+ rc = GET_CODE (operands[3]);
22166
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
22167
+ gen_rtx_fmt_ee (rc,
22171
+ gen_rtx_SET (VOIDmode,
22178
[(set_attr "conds" "clob")
22179
(set_attr "length" "12")]
22181
@@ -11626,6 +12424,9 @@
22182
(set_attr "predicable" "yes")])
22185
+;; Load the load/store double peephole optimizations.
22186
+(include "ldrdstrd.md")
22188
;; Load the load/store multiple patterns
22189
(include "ldmstm.md")
22191
--- a/src/gcc/config/arm/neon-gen.ml
22192
+++ b/src/gcc/config/arm/neon-gen.ml
22193
@@ -121,6 +121,7 @@
22194
| T_uint16 | T_int16 -> T_intHI
22195
| T_uint32 | T_int32 -> T_intSI
22196
| T_uint64 | T_int64 -> T_intDI
22197
+ | T_float16 -> T_floatHF
22198
| T_float32 -> T_floatSF
22199
| T_poly8 -> T_intQI
22200
| T_poly16 -> T_intHI
22201
@@ -275,8 +276,8 @@
22202
let mode = mode_of_elt elttype shape in
22203
string_of_mode mode
22204
with MixedMode (dst, src) ->
22205
- let dstmode = mode_of_elt dst shape
22206
- and srcmode = mode_of_elt src shape in
22207
+ let dstmode = mode_of_elt ~argpos:0 dst shape
22208
+ and srcmode = mode_of_elt ~argpos:1 src shape in
22209
string_of_mode dstmode ^ string_of_mode srcmode
22211
let get_shuffle features =
22212
@@ -291,19 +292,24 @@
22213
match List.find (fun feature ->
22214
match feature with Requires_feature _ -> true
22215
| Requires_arch _ -> true
22216
+ | Requires_FP_bit _ -> true
22219
- Requires_feature feature ->
22220
+ Requires_feature feature ->
22221
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
22222
| Requires_arch arch ->
22223
Format.printf "#if __ARM_ARCH >= %d@\n" arch
22224
+ | Requires_FP_bit bit ->
22225
+ Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
22227
| _ -> assert false
22228
with Not_found -> assert true
22230
let print_feature_test_end features =
22232
- List.exists (function Requires_feature x -> true
22233
- | Requires_arch x -> true
22234
+ List.exists (function Requires_feature _ -> true
22235
+ | Requires_arch _ -> true
22236
+ | Requires_FP_bit _ -> true
22237
| _ -> false) features in
22238
if feature then Format.printf "#endif@\n"
22240
@@ -365,6 +371,7 @@
22241
"__builtin_neon_hi", "int", 16, 4;
22242
"__builtin_neon_si", "int", 32, 2;
22243
"__builtin_neon_di", "int", 64, 1;
22244
+ "__builtin_neon_hf", "float", 16, 4;
22245
"__builtin_neon_sf", "float", 32, 2;
22246
"__builtin_neon_poly8", "poly", 8, 8;
22247
"__builtin_neon_poly16", "poly", 16, 4;
22248
--- a/src/libobjc/ChangeLog.linaro
22249
+++ b/src/libobjc/ChangeLog.linaro
22251
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22253
+ GCC Linaro 4.8-2013.05 released.
22255
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22257
+ * GCC Linaro 4.8-2013.04 released.
22258
--- a/src/libgfortran/ChangeLog.linaro
22259
+++ b/src/libgfortran/ChangeLog.linaro
22261
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22263
+ GCC Linaro 4.8-2013.05 released.
22265
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22267
+ * GCC Linaro 4.8-2013.04 released.
22268
--- a/src/libada/ChangeLog.linaro
22269
+++ b/src/libada/ChangeLog.linaro
22271
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22273
+ GCC Linaro 4.8-2013.05 released.
22275
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22277
+ * GCC Linaro 4.8-2013.04 released.
22278
--- a/src/libffi/ChangeLog.linaro
22279
+++ b/src/libffi/ChangeLog.linaro
22281
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22283
+ GCC Linaro 4.8-2013.05 released.
22285
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22287
+ * GCC Linaro 4.8-2013.04 released.
22288
--- a/src/libssp/ChangeLog.linaro
22289
+++ b/src/libssp/ChangeLog.linaro
22291
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22293
+ GCC Linaro 4.8-2013.05 released.
22295
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22297
+ * GCC Linaro 4.8-2013.04 released.
22298
--- a/src/libcpp/ChangeLog.linaro
22299
+++ b/src/libcpp/ChangeLog.linaro
22301
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22303
+ GCC Linaro 4.8-2013.05 released.
22305
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22307
+ * GCC Linaro 4.8-2013.04 released.
22308
--- a/src/libcpp/po/ChangeLog.linaro
22309
+++ b/src/libcpp/po/ChangeLog.linaro
22311
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22313
+ GCC Linaro 4.8-2013.05 released.
22315
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22317
+ * GCC Linaro 4.8-2013.04 released.
22318
--- a/src/fixincludes/ChangeLog.linaro
22319
+++ b/src/fixincludes/ChangeLog.linaro
22321
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22323
+ GCC Linaro 4.8-2013.05 released.
22325
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
22327
+ * GCC Linaro 4.8-2013.04 released.