1
# DP: Changes for the Linaro 4.8-2013.05 release.
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@198615 \
4
svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@r198871 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/ChangeLog.linaro
8
+++ b/src/libitm/ChangeLog.linaro
10
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
12
+ * GCC Linaro 4.8-2013.04 released.
13
--- a/src/libgomp/ChangeLog.linaro
14
+++ b/src/libgomp/ChangeLog.linaro
16
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
18
+ * GCC Linaro 4.8-2013.04 released.
19
--- a/src/libquadmath/ChangeLog.linaro
20
+++ b/src/libquadmath/ChangeLog.linaro
22
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
24
+ * GCC Linaro 4.8-2013.04 released.
25
--- a/src/libsanitizer/ChangeLog.linaro
26
+++ b/src/libsanitizer/ChangeLog.linaro
28
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
30
+ * GCC Linaro 4.8-2013.04 released.
31
--- a/src/zlib/ChangeLog.linaro
32
+++ b/src/zlib/ChangeLog.linaro
34
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
36
+ * GCC Linaro 4.8-2013.04 released.
37
--- a/src/libstdc++-v3/ChangeLog.linaro
38
+++ b/src/libstdc++-v3/ChangeLog.linaro
40
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
42
+ * GCC Linaro 4.8-2013.04 released.
43
--- a/src/intl/ChangeLog.linaro
44
+++ b/src/intl/ChangeLog.linaro
46
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
48
+ * GCC Linaro 4.8-2013.04 released.
49
--- a/src/ChangeLog.linaro
50
+++ b/src/ChangeLog.linaro
52
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
54
+ * GCC Linaro 4.8-2013.04 released.
55
--- a/src/libmudflap/ChangeLog.linaro
56
+++ b/src/libmudflap/ChangeLog.linaro
58
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
60
+ * GCC Linaro 4.8-2013.04 released.
61
--- a/src/boehm-gc/ChangeLog.linaro
62
+++ b/src/boehm-gc/ChangeLog.linaro
64
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
66
+ Backport from trunk r197770.
68
+ 2013-03-16 Yvan Roux <yvan.roux@linaro.org>
70
+ * include/private/gcconfig.h (AARCH64): New macro (defined only if
72
+ (mach_type_known): Update comment adding ARM AArch64 target.
73
+ (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE,
74
+ OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING,
75
+ DATASTART, DATAEND, STACKBOTTOM): Define for AArch64.
77
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
79
+ * GCC Linaro 4.8-2013.04 released.
80
--- a/src/boehm-gc/include/private/gcconfig.h
81
+++ b/src/boehm-gc/include/private/gcconfig.h
85
/* Determine the machine type: */
86
+#if defined(__aarch64__)
90
+# define mach_type_known
93
# if defined(__arm__) || defined(__thumb__)
95
# if !defined(LINUX) && !defined(NETBSD)
98
# define mach_type_known
100
+# if defined(LINUX) && defined(__aarch64__)
102
+# define mach_type_known
104
# if defined(LINUX) && defined(__arm__)
106
# define mach_type_known
108
/* running Amdahl UTS4 */
109
/* S390 ==> 390-like machine */
111
+ /* AARCH64 ==> ARM AArch64 */
112
/* ARM32 ==> Intel StrongARM */
113
/* IA64 ==> Intel IPF */
115
@@ -1833,6 +1845,32 @@
120
+# define CPP_WORDSZ 64
121
+# define MACH_TYPE "AARCH64"
122
+# define ALIGNMENT 8
124
+# define HBLKSIZE 4096
127
+# define OS_TYPE "LINUX"
128
+# define LINUX_STACKBOTTOM
129
+# define USE_GENERIC_PUSH_REGS
130
+# define DYNAMIC_LOADING
131
+ extern int __data_start[];
132
+# define DATASTART ((ptr_t)__data_start)
133
+ extern char _end[];
134
+# define DATAEND ((ptr_t)(&_end))
137
+ /* __data_start is usually defined in the target linker script. */
138
+ extern int __data_start[];
139
+# define DATASTART ((ptr_t)__data_start)
140
+ extern void *__stack_base__;
141
+# define STACKBOTTOM ((ptr_t)__stack_base__)
146
# define CPP_WORDSZ 32
147
# define MACH_TYPE "ARM32"
148
--- a/src/include/ChangeLog.linaro
149
+++ b/src/include/ChangeLog.linaro
151
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
153
+ * GCC Linaro 4.8-2013.04 released.
154
--- a/src/libiberty/ChangeLog.linaro
155
+++ b/src/libiberty/ChangeLog.linaro
157
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
159
+ * GCC Linaro 4.8-2013.04 released.
160
--- a/src/lto-plugin/ChangeLog.linaro
161
+++ b/src/lto-plugin/ChangeLog.linaro
163
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
165
+ * GCC Linaro 4.8-2013.04 released.
166
--- a/src/contrib/regression/ChangeLog.linaro
167
+++ b/src/contrib/regression/ChangeLog.linaro
169
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
171
+ * GCC Linaro 4.8-2013.04 released.
172
--- a/src/contrib/config-list.mk
173
+++ b/src/contrib/config-list.mk
175
# nohup nice make -j25 -l36 -f ../gcc/contrib/config-list.mk > make.out 2>&1 &
177
# v850e1-elf is rejected by config.sub
178
-LIST = alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
179
+LIST = aarch64-elf aarch64-linux-gnu \
180
+ alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
181
alpha64-dec-vms alpha-dec-vms am33_2.0-linux \
182
arm-wrs-vxworks arm-netbsdelf \
183
arm-linux-androideabi arm-uclinux_eabi arm-eabi \
184
--- a/src/contrib/ChangeLog.linaro
185
+++ b/src/contrib/ChangeLog.linaro
187
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
189
+ Backport from trunk r198443.
190
+ 2013-04-22 Sofiane Naci <sofiane.naci@arm.com>
192
+ * config-list.mk (LIST): Add aarch64-elf and aarch64-linux-gnu.
194
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
196
+ * GCC Linaro 4.8-2013.04 released.
197
--- a/src/contrib/reghunt/ChangeLog.linaro
198
+++ b/src/contrib/reghunt/ChangeLog.linaro
200
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
202
+ * GCC Linaro 4.8-2013.04 released.
203
--- a/src/libatomic/ChangeLog.linaro
204
+++ b/src/libatomic/ChangeLog.linaro
206
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
208
+ * GCC Linaro 4.8-2013.04 released.
209
--- a/src/config/ChangeLog.linaro
210
+++ b/src/config/ChangeLog.linaro
212
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
214
+ * GCC Linaro 4.8-2013.04 released.
215
--- a/src/libbacktrace/ChangeLog.linaro
216
+++ b/src/libbacktrace/ChangeLog.linaro
218
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
220
+ * GCC Linaro 4.8-2013.04 released.
221
--- a/src/libjava/libltdl/ChangeLog.linaro
222
+++ b/src/libjava/libltdl/ChangeLog.linaro
224
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
226
+ * GCC Linaro 4.8-2013.04 released.
227
--- a/src/libjava/ChangeLog.linaro
228
+++ b/src/libjava/ChangeLog.linaro
230
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
232
+ * GCC Linaro 4.8-2013.04 released.
233
--- a/src/libjava/classpath/ChangeLog.linaro
234
+++ b/src/libjava/classpath/ChangeLog.linaro
236
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
238
+ * GCC Linaro 4.8-2013.04 released.
239
--- a/src/gnattools/ChangeLog.linaro
240
+++ b/src/gnattools/ChangeLog.linaro
242
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
244
+ * GCC Linaro 4.8-2013.04 released.
245
--- a/src/maintainer-scripts/ChangeLog.linaro
246
+++ b/src/maintainer-scripts/ChangeLog.linaro
248
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
250
+ * GCC Linaro 4.8-2013.04 released.
251
--- a/src/libgcc/ChangeLog.linaro
252
+++ b/src/libgcc/ChangeLog.linaro
254
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
256
+ Backport from trunk r198090.
257
+ 2013-04-19 Yufeng Zhang <yufeng.zhang@arm.com>
259
+ * config/aarch64/sfp-machine.h (_FP_W_TYPE): Change to define
260
+ as 'unsigned long long' instead of 'unsigned long'.
261
+ (_FP_WS_TYPE): Change to define as 'signed long long' instead of
264
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
266
+ * GCC Linaro 4.8-2013.04 released.
267
--- a/src/libgcc/config/aarch64/sfp-machine.h
268
+++ b/src/libgcc/config/aarch64/sfp-machine.h
270
<http://www.gnu.org/licenses/>. */
272
#define _FP_W_TYPE_SIZE 64
273
-#define _FP_W_TYPE unsigned long
274
-#define _FP_WS_TYPE signed long
275
+#define _FP_W_TYPE unsigned long long
276
+#define _FP_WS_TYPE signed long long
277
#define _FP_I_TYPE int
279
typedef int TItype __attribute__ ((mode (TI)));
280
--- a/src/libgcc/config/libbid/ChangeLog.linaro
281
+++ b/src/libgcc/config/libbid/ChangeLog.linaro
283
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
285
+ * GCC Linaro 4.8-2013.04 released.
286
--- a/src/libdecnumber/ChangeLog.linaro
287
+++ b/src/libdecnumber/ChangeLog.linaro
289
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
291
+ * GCC Linaro 4.8-2013.04 released.
292
--- a/src/gcc/LINARO-VERSION
293
+++ b/src/gcc/LINARO-VERSION
296
--- a/src/gcc/hooks.c
297
+++ b/src/gcc/hooks.c
302
+/* Generic hook that takes (gimple_stmt_iterator *) and returns
305
+hook_bool_gsiptr_false (gimple_stmt_iterator *a ATTRIBUTE_UNUSED)
310
/* Used for the TARGET_ASM_CAN_OUTPUT_MI_THUNK hook. */
312
hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree a ATTRIBUTE_UNUSED,
313
--- a/src/gcc/hooks.h
314
+++ b/src/gcc/hooks.h
316
extern bool hook_bool_const_tree_false (const_tree);
317
extern bool hook_bool_tree_true (tree);
318
extern bool hook_bool_const_tree_true (const_tree);
319
+extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *);
320
extern bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree,
323
--- a/src/gcc/c-family/ChangeLog.linaro
324
+++ b/src/gcc/c-family/ChangeLog.linaro
326
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
328
+ * GCC Linaro 4.8-2013.04 released.
329
--- a/src/gcc/java/ChangeLog.linaro
330
+++ b/src/gcc/java/ChangeLog.linaro
332
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
334
+ * GCC Linaro 4.8-2013.04 released.
335
--- a/src/gcc/c/ChangeLog.linaro
336
+++ b/src/gcc/c/ChangeLog.linaro
338
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
340
+ * GCC Linaro 4.8-2013.04 released.
341
--- a/src/gcc/target.def
342
+++ b/src/gcc/target.def
343
@@ -1289,13 +1289,24 @@
345
tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL)
347
-/* Fold a target-specific builtin. */
348
+/* Fold a target-specific builtin to a tree valid for both GIMPLE
353
tree, (tree fndecl, int n_args, tree *argp, bool ignore),
354
hook_tree_tree_int_treep_bool_null)
356
+/* Fold a target-specific builtin to a valid GIMPLE tree. */
358
+(gimple_fold_builtin,
359
+ "Fold a call to a machine specific built-in function that was set up\n\
360
+by @samp{TARGET_INIT_BUILTINS}. @var{gsi} points to the gimple\n\
361
+statement holding the function call. Returns true if any change\n\
362
+was made to the GIMPLE stream.",
363
+ bool, (gimple_stmt_iterator *gsi),
364
+ hook_bool_gsiptr_false)
366
/* Target hook is used to compare the target attributes in two functions to
367
determine which function's features get higher priority. This is used
368
during function multi-versioning to figure out the order in which two
369
--- a/src/gcc/configure
370
+++ b/src/gcc/configure
371
@@ -1658,7 +1658,8 @@
372
use sysroot as the system root during the build
373
--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
374
--with-specs=SPECS add SPECS to driver command-line processing
375
- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
376
+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
377
+ GCC `cat $srcdir/LINARO-VERSION`"
378
--with-bugurl=URL Direct users to URL to report a bug
379
--with-multilib-list select multilibs (SH and x86-64 only)
380
--with-gnu-ld assume the C compiler uses GNU ld default=no
381
@@ -7327,7 +7328,7 @@
382
*) PKGVERSION="($withval) " ;;
385
- PKGVERSION="(GCC) "
386
+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
390
--- a/src/gcc/objc/ChangeLog.linaro
391
+++ b/src/gcc/objc/ChangeLog.linaro
393
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
395
+ * GCC Linaro 4.8-2013.04 released.
396
--- a/src/gcc/ChangeLog.linaro
397
+++ b/src/gcc/ChangeLog.linaro
399
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
401
+ Backport from trunk r198677.
402
+ 2013-05-07 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
404
+ * config/aarch64/aarch64.md
405
+ (cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): Restrict the
406
+ shift value between 0-4.
408
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
410
+ Backport from trunk r198574-198575.
411
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
413
+ * config/aarch64/aarch64-simd.md (simd_fabd): Correct the description.
415
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
417
+ * config/aarch64/aarch64-simd.md (*fabd_scalar<mode>3): Support
418
+ scalar form of FABD instruction.
420
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
422
+ Backport from trunk r198490-198496
423
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
425
+ * config/aarch64/arm_neon.h
426
+ (vac<ge, gt><sd>_f<32, 64>): Rename to...
427
+ (vca<ge, gt><sd>_f<32, 64>): ...this, reimpliment in C.
428
+ (vca<ge, gt, lt, le><q>_f<32, 64>): Reimpliment in C.
430
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
432
+ * config/aarch64/aarch64-simd.md (*aarch64_fac<optab><mode>): New.
433
+ * config/aarch64/iterators.md (FAC_COMPARISONS): New.
435
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
437
+ * config/aarch64/aarch64-simd.md
438
+ (vcond<mode>_internal): Handle special cases for constant masks.
439
+ (vcond<mode><mode>): Allow nonmemory_operands for outcome vectors.
440
+ (vcondu<mode><mode>): Likewise.
441
+ (vcond<v_cmp_result><mode>): New.
443
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
445
+ * config/aarch64/aarch64-builtins.c (BUILTIN_VALLDI): Define.
446
+ (aarch64_fold_builtin): Add folding for cm<eq,ge,gt,tst>.
447
+ * config/aarch64/aarch64-simd-builtins.def
448
+ (cmeq): Update to BUILTIN_VALLDI.
453
+ * config/aarch64/arm_neon.h
454
+ (vc<eq, lt, le, gt, ge, tst><z><qsd>_<fpsu><8,16,32,64>): Remap
455
+ to builtins or C as appropriate.
457
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
459
+ * config/aarch64/aarch64-simd-builtins.def (cmhs): Rename to...
461
+ (cmhi): Rename to...
463
+ * config/aarch64/aarch64-simd.md
464
+ (simd_mode): Add SF.
465
+ (aarch64_vcond_internal): Use new names for unsigned comparison insns.
466
+ (aarch64_cm<optab><mode>): Rewrite to not use UNSPECs.
467
+ * config/aarch64/aarch64.md (*cstore<mode>_neg): Rename to...
468
+ (cstore<mode>_neg): ...This.
469
+ * config/aarch64/iterators.md
471
+ (unspec): Remove UNSPEC_CM<EQ, LE, LT, GE, GT, HS, HI, TST>.
472
+ (COMPARISONS): New.
473
+ (UCOMPARISONS): Likewise.
474
+ (optab): Add missing comparisons.
480
+ (VCMP_S): Likewise.
481
+ (VCMP_U): Likewise.
482
+ (V_cmp_result): Add DF, SF modes.
483
+ (v_cmp_result): Likewise.
485
+ (vmtype): Likewise.
486
+ * config/aarch64/predicates.md (aarch64_reg_or_fp_zero): New.
488
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
490
+ Backport from trunk r198191.
491
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
493
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add simd attribute.
495
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@lianro.org>
497
+ Backport from trunk r197838.
498
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
500
+ * config/aarch64/aarch64.c (aarch64_select_cc_mode): Allow NEG
501
+ code in CC_NZ mode.
502
+ * config/aarch64/aarch64.md (*neg_<shift><mode>3_compare0): New
505
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
507
+ Backport from trunk r198019.
508
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
510
+ * config/aarch64/aarch64.md (*adds_mul_imm_<mode>): New pattern.
511
+ (*subs_mul_imm_<mode>): New pattern.
513
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
515
+ Backport from trunk r198424-198425.
516
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
518
+ * config/aarch64/aarch64.md (movsi_aarch64): Support LDR/STR
519
+ from/to S register.
520
+ (movdi_aarch64): Support LDR/STR from/to D register.
522
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
524
+ * common/config/aarch64/aarch64-common.c: Enable REE pass at O2
525
+ or higher by default.
527
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
529
+ Backport from trunk r198412.
530
+ 2013-04-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
532
+ * config/arm/arm.md (store_minmaxsi): Use only when
533
+ optimize_insn_for_size_p.
535
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
537
+ Backport from trunk 198394,198396-198400,198402-198404.
538
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
540
+ * config/aarch64/arm_neon.h
541
+ (vcvt<sd>_f<32,64>_s<32,64>): Rewrite in C.
542
+ (vcvt<q>_f<32,64>_s<32,64>): Rewrite using builtins.
543
+ (vcvt_<high_>_f<32,64>_f<32,64>): Likewise.
544
+ (vcvt<qsd>_<su><32,64>_f<32,64>): Likewise.
545
+ (vcvta<qsd>_<su><32,64>_f<32,64>): Likewise.
546
+ (vcvtm<qsd>_<su><32,64>_f<32,64>): Likewise.
547
+ (vcvtn<qsd>_<su><32,64>_f<32,64>): Likewise.
548
+ (vcvtp<qsd>_<su><32,64>_f<32,64>): Likewise.
550
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
552
+ * config/aarch64/aarch64-simd.md
553
+ (<optab><VDQF:mode><fcvt_target>2): New, maps to fix, fixuns.
554
+ (<fix_trunc_optab><VDQF:mode><fcvt_target>2): New, maps to
555
+ fix_trunc, fixuns_trunc.
556
+ (ftrunc<VDQF:mode>2): New.
557
+ * config/aarch64/iterators.md (optab): Add fix, fixuns.
558
+ (fix_trunc_optab): New.
560
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
562
+ * config/aarch64/aarch64-builtins.c
563
+ (aarch64_builtin_vectorized_function): Vectorize over ifloorf,
564
+ iceilf, lround, iroundf.
566
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
568
+ * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
569
+ (float_truncate_hi_): Likewise.
570
+ (float_extend_lo_): Likewise.
571
+ (float_truncate_lo_): Likewise.
572
+ * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
573
+ (aarch64_float_extend_lo_v2df): Likewise.
574
+ (vec_unpacks_hi_v4sf): Likewise.
575
+ (aarch64_float_truncate_lo_v2sf): Likewise.
576
+ (aarch64_float_truncate_hi_v4sf): Likewise.
577
+ (vec_pack_trunc_v2df): Likewise.
578
+ (vec_pack_trunc_df): Likewise.
580
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
582
+ * config/aarch64/aarch64-builtins.c
583
+ (aarch64_fold_builtin): Fold float conversions.
584
+ * config/aarch64/aarch64-simd-builtins.def
585
+ (floatv2si, floatv4si, floatv2di): New.
586
+ (floatunsv2si, floatunsv4si, floatunsv2di): Likewise.
587
+ * config/aarch64/aarch64-simd.md
588
+ (<optab><fcvt_target><VDQF:mode>2): New, expands to float and floatuns.
589
+ * config/aarch64/iterators.md (FLOATUORS): New.
590
+ (optab): Add float, floatuns.
591
+ (su_optab): Likewise.
593
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
595
+ * config/aarch64/aarch64-builtins.c
596
+ (aarch64_builtin_vectorized_function): Fold to standard pattern names.
597
+ * config/aarch64/aarch64-simd-builtins.def (frintn): New.
598
+ (frintz): Rename to...
600
+ (frintp): Rename to...
602
+ (frintm): Rename to...
604
+ (frinti): Rename to...
605
+ (nearbyint): ...this.
606
+ (frintx): Rename to...
608
+ (frinta): Rename to...
610
+ * config/aarch64/aarch64-simd.md
611
+ (aarch64_frint<frint_suffix><mode>): Delete.
612
+ (<frint_pattern><mode>2): Convert to insn.
613
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRINTN.
614
+ * config/aarch64/iterators.md (FRINT): Add UNSPEC_FRINTN.
615
+ (frint_pattern): Likewise.
616
+ (frint_suffix): Likewise.
618
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
620
+ Backport from trunk r198302-198306,198316.
621
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
623
+ * config/aarch64/aarch64-simd.md
624
+ (aarch64_simd_bsl<mode>_internal): Rewrite RTL to not use UNSPEC_BSL.
625
+ (aarch64_simd_bsl<mode>): Likewise.
626
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_BSL.
628
+ 2013-04-25 James Greenhalgh <jame.greenhalgh@arm.com>
630
+ * config/aarch64/aarch64-simd.md (neg<mode>2): Use VDQ iterator.
632
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
634
+ * config/aarch64/aarch64-builtins.c
635
+ (aarch64_fold_builtin): New.
636
+ * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): New.
637
+ * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
638
+ * config/aarch64/aarch64-simd-builtins.def (abs): New.
639
+ * config/aarch64/arm_neon.h
640
+ (vabs<q>_<f32, 64>): Implement using __builtin_aarch64_fabs.
642
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
643
+ Tejas Belagod <tejas.belagod@arm.com>
645
+ * config/aarch64/aarch64-builtins.c
646
+ (aarch64_gimple_fold_builtin): New.
647
+ * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
648
+ * config/aarch64/aarch64-simd-builtins.def (addv): New.
649
+ * config/aarch64/aarch64-simd.md (addpv4sf): New.
650
+ (addvv4sf): Update.
651
+ * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
653
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
655
+ * config/aarch64/aarch64.md
656
+ (*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): New pattern.
658
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
660
+ * config/aarch64/aarch64.md (*ngc<mode>): New pattern.
661
+ (*ngcsi_uxtw): New pattern.
663
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
665
+ Backport from trunk 198298.
666
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
667
+ Julian Brown <julian@codesourcery.com>
669
+ * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
670
+ (TB_DREG): Add T_V4HF.
671
+ (v4hf_UP): New macro.
672
+ (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
673
+ (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
675
+ Handle initialisation of V4HF. Adjust initialisation of reinterpret
677
+ (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
679
+ (arm_vector_mode_supported_p): Handle V4HF.
680
+ (arm_mangle_map): Handle V4HFmode.
681
+ * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
682
+ * config/arm/arm_neon_builtins.def: Add entries for
683
+ vcvtv4hfv4sf, vcvtv4sfv4hf.
684
+ * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
685
+ (neon_vcvtv4hfv4sf): Likewise.
686
+ * config/arm/neon-gen.ml: Handle half-precision floating point
688
+ * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
689
+ * config/arm/arm_neon.h: Regenerate.
690
+ * config/arm/neon.ml (type elts): Add F16.
691
+ (type vectype): Add T_float16x4, T_floatHF.
692
+ (type vecmode): Add V4HF.
693
+ (type features): Add Requires_FP_bit feature.
694
+ (elt_width): Handle F16.
695
+ (elt_class): Likewise.
696
+ (elt_of_class_width): Likewise.
697
+ (mode_of_elt): Refactor.
698
+ (type_for_elt): Handle F16, fix error messages.
699
+ (vectype_size): Handle T_float16x4.
700
+ (vcvt_sh): New function.
701
+ (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
702
+ (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
703
+ (string_of_mode): Handle V4HF.
704
+ * doc/arm-neon-intrinsics.texi: Regenerate.
706
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
708
+ Backport from trunk r198136-198137,198142,198176.
709
+ 2013-04-23 Andreas Schwab <schwab@linux-m68k.org>
711
+ * coretypes.h (gimple_stmt_iterator): Add struct to make
714
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
716
+ * coretypes.h (gimple_stmt_iterator_d): Forward declare.
717
+ (gimple_stmt_iterator): New typedef.
718
+ * gimple.h (gimple_stmt_iterator): Rename to...
719
+ (gimple_stmt_iterator_d): ... This.
720
+ * doc/tm.texi.in (TARGET_FOLD_BUILTIN): Detail restriction that
721
+ trees be valid for GIMPLE and GENERIC.
722
+ (TARGET_GIMPLE_FOLD_BUILTIN): New.
723
+ * gimple-fold.c (gimple_fold_call): Call target hook
724
+ gimple_fold_builtin.
725
+ * hooks.c (hook_bool_gsiptr_false): New.
726
+ * hooks.h (hook_bool_gsiptr_false): New.
727
+ * target.def (fold_stmt): New.
728
+ * doc/tm.texi: Regenerate.
730
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
732
+ * config/aarch64/aarch64-builtins.c
734
+ (CF0, CF1, CF2, CF3, CF4, CF10): New.
735
+ (VAR<1-12>): Add MAP parameter.
736
+ (BUILTIN_*): Likewise.
737
+ * config/aarch64/aarch64-simd-builtins.def: Set MAP parameter.
738
+ * config/aarch64/aarch64-simd.md (aarch64_sshl_n<mode>): Remove.
739
+ (aarch64_ushl_n<mode>): Likewise.
740
+ (aarch64_sshr_n<mode>): Likewise.
741
+ (aarch64_ushr_n<mode>): Likewise.
742
+ (aarch64_<maxmin><mode>): Likewise.
743
+ (aarch64_sqrt<mode>): Likewise.
744
+ * config/aarch64/arm_neon.h (vshl<q>_n_*): Use new builtin names.
745
+ (vshr<q>_n_*): Likewise.
747
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
749
+ * config/aarch64/aarch64-builtins.c
750
+ (aarch64_simd_builtin_type_mode): Handle SF types.
752
+ (BUILTIN_GPF): Define.
753
+ (aarch64_init_simd_builtins): Handle SF types.
754
+ * config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
755
+ (frecps): Likewise.
756
+ (frecpx): Likewise.
757
+ * config/aarch64/aarch64-simd.md
758
+ (simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
759
+ (aarch64_frecpe<mode>): New.
760
+ (aarch64_frecps<mode>): Likewise.
761
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
762
+ (v8type): Add frecp<esx>.
763
+ (aarch64_frecp<FRECP:frecp_suffix><mode>): New.
764
+ (aarch64_frecps<mode>): Likewise.
765
+ * config/aarch64/iterators.md (FRECP): New.
766
+ (frecp_suffix): Likewise.
767
+ * config/aarch64/arm_neon.h
768
+ (vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.
770
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
772
+ Backport from trunk r198030.
773
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
775
+ * config/arm/arm.md (movsicc_insn): Convert define_insn into
776
+ define_insn_and_split.
777
+ (and_scc,ior_scc,negscc): Likewise.
778
+ (cmpsi2_addneg, subsi3_compare): Convert to named patterns.
780
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
782
+ Backport from trunk r198020.
783
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
785
+ * config/aarch64/aarch64.md (*adds_<optab><mode>_multp2):
787
+ (*subs_<optab><mode>_multp2): New pattern.
788
+ (*adds_<optab><ALLX:mode>_<GPI:mode>): New pattern.
789
+ (*subs_<optab><ALLX:mode>_<GPI:mode>): New pattern.
791
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
793
+ Backport from trunk r198004,198029.
794
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
796
+ * config/arm/arm.c (use_return_insn): Return 0 for targets that
797
+ can benefit from using a sequence of LDRD instructions in epilogue
798
+ instead of a single LDM instruction.
800
+ 2013-04-16 Greta Yorsh <Greta.Yorsh at arm.com>
802
+ * config/arm/arm.c (emit_multi_reg_push): New declaration
803
+ for an existing function.
804
+ (arm_emit_strd_push): New function.
805
+ (arm_expand_prologue): Used here.
806
+ (arm_emit_ldrd_pop): New function.
807
+ (arm_expand_epilogue): Used here.
808
+ (arm_get_frame_offsets): Update condition.
809
+ (arm_emit_multi_reg_pop): Add a special case for load of a single
810
+ register with writeback.
812
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
814
+ Backport from trunk r197965.
815
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
817
+ * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case.
818
+ * config/arm/arm.md (*anddi3_insn): Change to insn_and_split.
819
+ * config/arm/constraints.md (De): New constraint.
820
+ * config/arm/neon.md (anddi3_neon): Delete.
821
+ (neon_vand<mode>): Expand to standard anddi3 pattern.
822
+ * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
823
+ Move earlier in the file.
824
+ (neon_inv_logic_op2): Likewise.
825
+ (arm_anddi_operand_neon): New predicate.
827
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
829
+ Backport from trunk r197925.
830
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
832
+ * config/arm/arm.md (mov_scc,mov_negscc,mov_notscc): Convert
833
+ define_insn into define_insn_and_split and emit movsicc patterns.
835
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
837
+ Backport from trunk r197807.
838
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
840
+ * config/aarch64/aarch64.h (REVERSIBLE_CC_MODE): Define.
842
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
844
+ Backport from trunk r197642.
845
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
847
+ * config/arm/arm.md (minmax_arithsi_non_canon): New pattern.
849
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
851
+ Backport from trunk r197530,197921.
852
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
854
+ * config/arm/arm.c (gen_operands_ldrd_strd): Initialize "base".
856
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
858
+ * config/arm/constraints.md (q): New constraint.
859
+ * config/arm/ldrdstrd.md: New file.
860
+ * config/arm/arm.md (ldrdstrd.md) New include.
861
+ (arm_movdi): Use "q" instead of "r" constraint
862
+ for double-word memory access.
863
+ (movdf_soft_insn): Likewise.
864
+ * config/arm/vfp.md (movdi_vfp): Likewise.
865
+ * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md.
866
+ * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration.
867
+ * config/arm/arm.c (gen_operands_ldrd_strd): New function.
868
+ (mem_ok_for_ldrd_strd): Likewise.
869
+ (output_move_double): Update assertion.
871
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
873
+ Backport of trunk r197518-197522,197526-197528.
874
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
876
+ * config/arm/arm.md (arm_smax_insn): Convert define_insn into
877
+ define_insn_and_split.
878
+ (arm_smin_insn,arm_umaxsi3,arm_uminsi3): Likewise.
880
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
882
+ * config/arm/arm.md (arm_ashldi3_1bit): Convert define_insn into
883
+ define_insn_and_split.
884
+ (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise.
885
+ (shiftsi3_compare): New pattern.
886
+ (rrx): New pattern.
887
+ * config/arm/unspecs.md (UNSPEC_RRX): New.
889
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
891
+ * config/arm/arm.md (negdi_extendsidi): New pattern.
892
+ (negdi_zero_extendsidi): Likewise.
894
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
896
+ * config/arm/arm.md (andsi_iorsi3_notsi): Convert define_insn into
897
+ define_insn_and_split.
898
+ (arm_negdi2,arm_abssi2,arm_neg_abssi2): Likewise.
899
+ (arm_cmpdi_insn,arm_cmpdi_unsigned): Likewise.
901
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
903
+ * config/arm/arm.md (arm_subdi3): Convert define_insn into
904
+ define_insn_and_split.
905
+ (subdi_di_zesidi,subdi_di_sesidi): Likewise.
906
+ (subdi_zesidi_di,subdi_sesidi_di,subdi_zesidi_zesidi): Likewise.
908
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
910
+ * config/arm/arm.md (subsi3_carryin): New pattern.
911
+ (subsi3_carryin_const): Likewise.
912
+ (subsi3_carryin_compare,subsi3_carryin_compare_const): Likewise.
913
+ (subsi3_carryin_shift,rsbsi3_carryin_shift): Likewise.
915
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
917
+ * config/arm/arm.md (incscc,arm_incscc,decscc,arm_decscc): Delete.
919
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
921
+ * config/arm/arm.md (addsi3_carryin_<optab>): Set attribute predicable.
922
+ (addsi3_carryin_alt2_<optab>,addsi3_carryin_shift_<optab>): Likewise.
924
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
926
+ Backport of trunk r197517.
927
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
929
+ * config/arm/arm.c (arm_expand_builtin): Change fcode
930
+ type to unsigned int.
932
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
934
+ Backport of trunk r197513.
935
+ 2013-04-05 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
937
+ * doc/invoke.texi (ARM Options): Document cortex-a53 support.
939
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
941
+ Backport of trunk r197489-197491.
942
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
944
+ * config/arm/arm-protos.h (arm_builtin_vectorized_function):
945
+ New function prototype.
946
+ * config/arm/arm.c (TARGET_VECTORIZE_BUILTINS): Define.
947
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
948
+ (arm_builtin_vectorized_function): New function.
950
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
952
+ * config/arm/arm_neon_builtins.def: New file.
953
+ * config/arm/arm.c (neon_builtin_data): Move contents to
954
+ arm_neon_builtins.def.
955
+ (enum arm_builtins): Include neon builtin definitions.
956
+ (ARM_BUILTIN_NEON_BASE): Move from enum to macro.
957
+ * config/arm/t-arm (arm.o): Add dependency on
958
+ arm_neon_builtins.def.
960
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
962
+ Backport of trunk 196795-196797,196957
963
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
965
+ * config/aarch64/aarch64.md (*sub<mode>3_carryin): New pattern.
966
+ (*subsi3_carryin_uxtw): Likewise.
968
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
970
+ * config/aarch64/aarch64.md (*ror<mode>3_insn): New pattern.
971
+ (*rorsi3_insn_uxtw): Likewise.
973
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
975
+ * config/aarch64/aarch64.md (*extr<mode>5_insn): New pattern.
976
+ (*extrsi5_insn_uxtw): Likewise.
978
+2013-04-10 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
980
+ * LINARO-VERSION: Bump version number.
982
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
984
+ * GCC Linaro 4.8-2013.04 released.
986
+ * LINARO-VERSION: New file.
987
+ * configure.ac: Add Linaro version string.
988
+ * configure: Regenerate.
990
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
992
+ Backport of trunk r197346.
993
+ 2013-04-02 Ian Caulfield <ian.caulfield@arm.com>
994
+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
996
+ * config/arm/arm-arches.def (armv8-a): Default to cortex-a53.
997
+ * config/arm/t-arm (MD_INCLUDES): Depend on cortex-a53.md.
998
+ * config/arm/cortex-a53.md: New file.
999
+ * config/arm/bpabi.h (BE8_LINK_SPEC): Handle cortex-a53.
1000
+ * config/arm/arm.md (generic_sched, generic_vfp): Handle cortex-a53.
1001
+ * config/arm/arm.c (arm_issue_rate): Likewise.
1002
+ * config/arm/arm-tune.md: Regenerate
1003
+ * config/arm/arm-tables.opt: Regenerate.
1004
+ * config/arm/arm-cores.def: Add cortex-a53.
1006
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1008
+ Backport of trunk r197342.
1009
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
1011
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add variants for
1012
+ scalar load/store operations using B/H registers.
1013
+ (*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Likewise.
1015
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1017
+ Backport of trunk r197341.
1018
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
1020
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add alternatives for
1022
+ * config/aarch64/aarch64.c
1023
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
1024
+ * config/aarch64/aarch64-protos.h
1025
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
1026
+ * config/aarch64/constraints.md (Dh, Dq): New.
1027
+ * config/aarch64/iterators.md (hq): New.
1029
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1031
+ Backport from trunk r197207.
1032
+ 2013-03-28 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1034
+ * config/aarch64/aarch64.md (*and<mode>3_compare0): New pattern.
1035
+ (*andsi3_compare0_uxtw): New pattern.
1036
+ (*and_<SHIFT:optab><mode>3_compare0): New pattern.
1037
+ (*and_<SHIFT:optab>si3_compare0_uxtw): New pattern.
1039
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1041
+ Backport from trunk r197153.
1042
+ 2013-03-27 Terry Guo <terry.guo@arm.com>
1044
+ * config/arm/arm-cores.def: Added core cortex-r7.
1045
+ * config/arm/arm-tune.md: Regenerated.
1046
+ * config/arm/arm-tables.opt: Regenerated.
1047
+ * doc/invoke.texi: Added entry for core cortex-r7.
1049
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1051
+ Backport from trunk r197052.
1052
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1054
+ * config/arm/arm.md (f_sels, f_seld): New types.
1055
+ (*cmov<mode>): New pattern.
1056
+ * config/arm/predicates.md (arm_vsel_comparison_operator): New
1059
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1061
+ Backport from trunk r197046.
1062
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
1064
+ * config/arm/arm.c (arm_emit_load_exclusive): Add acq parameter.
1065
+ Emit load-acquire versions when acq is true.
1066
+ (arm_emit_store_exclusive): Add rel parameter.
1067
+ Emit store-release versions when rel is true.
1068
+ (arm_split_compare_and_swap): Use acquire-release instructions
1070
+ of barriers when appropriate.
1071
+ (arm_split_atomic_op): Likewise.
1072
+ * config/arm/arm.h (TARGET_HAVE_LDACQ): New macro.
1073
+ * config/arm/unspecs.md (VUNSPEC_LAX): New unspec.
1074
+ (VUNSPEC_SLX): Likewise.
1075
+ (VUNSPEC_LDA): Likewise.
1076
+ (VUNSPEC_STL): Likewise.
1077
+ * config/arm/sync.md (atomic_load<mode>): New pattern.
1078
+ (atomic_store<mode>): Likewise.
1079
+ (arm_load_acquire_exclusive<mode>): Likewise.
1080
+ (arm_load_acquire_exclusivesi): Likewise.
1081
+ (arm_load_acquire_exclusivedi): Likewise.
1082
+ (arm_store_release_exclusive<mode>): Likewise.
1084
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1086
+ Backport from trunk r196876.
1087
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
1089
+ * config/arm/arm-protos.h (tune_params): Add
1090
+ prefer_neon_for_64bits field.
1091
+ * config/arm/arm.c (prefer_neon_for_64bits): New variable.
1092
+ (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
1093
+ (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
1094
+ (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
1095
+ (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
1096
+ (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
1097
+ (arm_option_override): Handle -mneon-for-64bits new option.
1098
+ * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
1099
+ (prefer_neon_for_64bits): Declare new variable.
1100
+ * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
1101
+ avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
1103
+ (arch_enabled): Handle new arch types. Remove support for onlya8
1105
+ (one_cmpldi2): Use new arch names.
1106
+ * config/arm/arm.opt (mneon-for-64bits): Add option.
1107
+ * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
1108
+ (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
1109
+ neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
1111
+ * doc/invoke.texi (-mneon-for-64bits): Document.
1113
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1115
+ Backport from trunk r196858.
1116
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1118
+ * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute.
1119
+ (abd<mode>_3): New pattern.
1120
+ (aba<mode>_3): New pattern.
1121
+ (fabd<mode>_3): New pattern.
1123
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1125
+ Backport from trunk r196856.
1126
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1128
+ * config/aarch64/aarch64-elf.h (REGISTER_PREFIX): Remove.
1129
+ * config/aarch64/aarch64.c (aarch64_print_operand): Remove all
1130
+ occurrence of REGISTER_PREFIX as its empty string.
1131
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1132
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1134
+/* { dg-do compile } */
1135
+/* { dg-require-effective-target arm_v8_neon_ok } */
1136
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1137
+/* { dg-add-options arm_v8_neon } */
1142
+foo (float *output, float *input)
1145
+ /* Vectorizable. */
1146
+ for (i = 0; i < N; i++)
1147
+ output[i] = __builtin_floorf (input[i]);
1150
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_floorf } } } */
1151
+/* { dg-final { cleanup-tree-dump "vect" } } */
1152
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1153
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1155
+/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
1156
+/* This file was autogenerated by neon-testgen. */
1158
+/* { dg-do assemble } */
1159
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1160
+/* { dg-options "-save-temps -O0" } */
1161
+/* { dg-add-options arm_neon_fp16 } */
1163
+#include "arm_neon.h"
1165
+void test_vcvtf32_f16 (void)
1167
+ float32x4_t out_float32x4_t;
1168
+ float16x4_t arg0_float16x4_t;
1170
+ out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
1173
+/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1174
+/* { dg-final { cleanup-saved-temps } } */
1175
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1176
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1178
+/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
1179
+/* This file was autogenerated by neon-testgen. */
1181
+/* { dg-do assemble } */
1182
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1183
+/* { dg-options "-save-temps -O0" } */
1184
+/* { dg-add-options arm_neon_fp16 } */
1186
+#include "arm_neon.h"
1188
+void test_vcvtf16_f32 (void)
1190
+ float16x4_t out_float16x4_t;
1191
+ float32x4_t arg0_float32x4_t;
1193
+ out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
1196
+/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1197
+/* { dg-final { cleanup-saved-temps } } */
1198
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1199
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1201
+/* { dg-do compile } */
1202
+/* { dg-options "-O1" } */
1205
+muld (unsigned long long X, unsigned long long Y)
1207
+ unsigned long long mask = 0xffffffffull;
1208
+ return (X & mask) * (Y & mask);
1211
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
1212
--- a/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1213
+++ b/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1215
+/* { dg-do compile } */
1216
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1217
+/* { dg-options "-O2" } */
1218
+int foo(int a, int b, int* p, int *q)
1225
+/* { dg-final { scan-assembler "ldrd" } } */
1226
--- a/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1227
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1229
+/* { dg-do compile } */
1230
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1231
+/* { dg-options "-O2" } */
1232
+/* { dg-add-options arm_v8_vfp } */
1235
+foo (double x, double y)
1237
+ volatile int i = 0;
1238
+ return i > 0 ? x : y;
1241
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
1242
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1243
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1245
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1246
+/* { dg-do compile } */
1247
+/* { dg-options "-O2" } */
1248
+/* { dg-add-options arm_arch_v8a } */
1250
+#include "../aarch64/atomic-op-relaxed.x"
1252
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1253
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1254
+/* { dg-final { scan-assembler-not "dmb" } } */
1255
--- a/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1256
+++ b/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1258
+/* { dg-do compile } */
1259
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1260
+/* { dg-options "-O2" } */
1261
+/* { dg-add-options arm_v8_vfp } */
1264
+foo (float x, float y)
1266
+ volatile int i = 0;
1267
+ return i >= 0 ? x : y;
1270
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
1271
--- a/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1272
+++ b/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1274
+/* { dg-do compile } */
1275
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1276
+/* { dg-options "-O2" } */
1277
+void foo(int a, int b, int* p)
1282
+/* { dg-final { scan-assembler "strd" } } */
1283
--- a/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1284
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1286
+/* { dg-do compile } */
1287
+/* { dg-require-effective-target arm32 } */
1288
+/* { dg-options "-O2" } */
1290
+signed long long extendsidi_negsi (signed int x)
1298
+ mov r1, r0, asr #31
1300
+/* { dg-final { scan-assembler-times "rsb" 1 { target { arm_nothumb } } } } */
1301
+/* { dg-final { scan-assembler-times "negs\\t" 1 { target { ! { arm_nothumb } } } } } */
1302
+/* { dg-final { scan-assembler-times "asr" 1 } } */
1303
--- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1304
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1306
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1307
+/* { dg-do compile } */
1308
+/* { dg-options "-O2" } */
1309
+/* { dg-add-options arm_arch_v8a } */
1311
+#include "../aarch64/atomic-comp-swap-release-acquire.x"
1313
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 4 } } */
1314
+/* { dg-final { scan-assembler-times "stlex" 4 } } */
1315
+/* { dg-final { scan-assembler-not "dmb" } } */
1316
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1317
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1319
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1320
+/* { dg-do compile } */
1321
+/* { dg-options "-O2" } */
1322
+/* { dg-add-options arm_arch_v8a } */
1324
+#include "../aarch64/atomic-op-seq_cst.x"
1326
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1327
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1328
+/* { dg-final { scan-assembler-not "dmb" } } */
1329
--- a/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1330
+++ b/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1332
+/* { dg-do compile } */
1333
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1334
+/* { dg-options "-O2" } */
1335
+/* { dg-add-options arm_v8_vfp } */
1338
+foo (double x, double y)
1340
+ volatile int i = 0;
1341
+ return i >= 0 ? x : y;
1344
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
1345
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1346
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1348
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1349
+/* { dg-do compile } */
1350
+/* { dg-options "-O2" } */
1351
+/* { dg-add-options arm_arch_v8a } */
1353
+#include "../aarch64/atomic-op-consume.x"
1355
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1356
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1357
+/* { dg-final { scan-assembler-not "dmb" } } */
1358
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1359
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1361
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1362
+/* { dg-do compile } */
1363
+/* { dg-options "-O2" } */
1364
+/* { dg-add-options arm_arch_v8a } */
1366
+#include "../aarch64/atomic-op-char.x"
1368
+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1369
+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1370
+/* { dg-final { scan-assembler-not "dmb" } } */
1371
--- a/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1372
+++ b/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1374
+/* { dg-do compile } */
1375
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1376
+/* { dg-options "-O2" } */
1377
+/* { dg-add-options arm_v8_vfp } */
1380
+foo (float x, float y)
1382
+ volatile int i = 0;
1383
+ return i != 0 ? x : y;
1386
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
1387
--- a/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1388
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1390
+/* { dg-do compile } */
1391
+/* { dg-require-effective-target arm32 } */
1392
+/* { dg-options "-O2" } */
1394
+signed long long zero_extendsidi_negsi (unsigned int x)
1403
+/* { dg-final { scan-assembler-times "rsb\\tr0, r0, #0" 1 { target { arm_nothumb } } } } */
1404
+/* { dg-final { scan-assembler-times "negs\\tr0, r0" 1 { target { ! arm_nothumb } } } } */
1405
+/* { dg-final { scan-assembler-times "mov" 1 } } */
1406
--- a/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1407
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1409
+/* { dg-do compile } */
1410
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1411
+/* { dg-options "-O2" } */
1412
+/* { dg-add-options arm_v8_vfp } */
1415
+foo (float x, float y)
1417
+ return !__builtin_isunordered (x, y) ? x : y;
1420
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1421
--- a/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1422
+++ b/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1424
+/* { dg-do compile } */
1425
+/* { dg-options "-O2" } */
1427
+#define MAX(a, b) (a > b ? a : b)
1429
+foo (int a, int b, int c)
1431
+ return c - MAX (a, b);
1434
+/* { dg-final { scan-assembler "rsbge" } } */
1435
+/* { dg-final { scan-assembler "rsblt" } } */
1436
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1437
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1439
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1440
+/* { dg-do compile } */
1441
+/* { dg-options "-O2" } */
1442
+/* { dg-add-options arm_arch_v8a } */
1444
+#include "../aarch64/atomic-op-release.x"
1446
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1447
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1448
+/* { dg-final { scan-assembler-not "dmb" } } */
1449
--- a/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1450
+++ b/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1452
+/* { dg-do compile } */
1453
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1454
+/* { dg-options "-O2" } */
1455
+/* { dg-add-options arm_v8_vfp } */
1458
+foo (float x, float y)
1460
+ return __builtin_isunordered (x, y) ? x : y;
1463
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1464
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1465
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1467
+/* { dg-do compile } */
1468
+/* { dg-require-effective-target arm_v8_neon_ok } */
1469
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1470
+/* { dg-add-options arm_v8_neon } */
1475
+foo (float *output, float *input)
1478
+ /* Vectorizable. */
1479
+ for (i = 0; i < N; i++)
1480
+ output[i] = __builtin_roundf (input[i]);
1483
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_roundf } } } */
1484
+/* { dg-final { cleanup-tree-dump "vect" } } */
1485
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1486
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1488
+/* Check that Neon is *not* used by default to handle 64-bits scalar
1491
+/* { dg-do compile } */
1492
+/* { dg-require-effective-target arm_neon_ok } */
1493
+/* { dg-options "-O2" } */
1494
+/* { dg-add-options arm_neon } */
1496
+typedef long long i64;
1497
+typedef unsigned long long u64;
1498
+typedef unsigned int u32;
1501
+/* Unary operators */
1502
+#define UNARY_OP(name, op) \
1503
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
1505
+/* Binary operators */
1506
+#define BINARY_OP(name, op) \
1507
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
1509
+/* Unsigned shift */
1510
+#define SHIFT_U(name, op, amount) \
1511
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
1514
+#define SHIFT_S(name, op, amount) \
1515
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
1525
+SHIFT_U(right1, >>, 1)
1526
+SHIFT_U(right2, >>, 2)
1527
+SHIFT_U(right5, >>, 5)
1528
+SHIFT_U(rightn, >>, c)
1530
+SHIFT_S(right1, >>, 1)
1531
+SHIFT_S(right2, >>, 2)
1532
+SHIFT_S(right5, >>, 5)
1533
+SHIFT_S(rightn, >>, c)
1535
+/* { dg-final {scan-assembler-times "vmvn" 0} } */
1536
+/* { dg-final {scan-assembler-times "vadd" 0} } */
1537
+/* { dg-final {scan-assembler-times "vsub" 0} } */
1538
+/* { dg-final {scan-assembler-times "vand" 0} } */
1539
+/* { dg-final {scan-assembler-times "vorr" 0} } */
1540
+/* { dg-final {scan-assembler-times "veor" 0} } */
1541
+/* { dg-final {scan-assembler-times "vshr" 0} } */
1542
--- a/src/gcc/testsuite/gcc.target/arm/negdi-3.c
1543
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-3.c
1545
+/* { dg-do compile } */
1546
+/* { dg-require-effective-target arm32 } */
1547
+/* { dg-options "-O2" } */
1549
+signed long long negdi_zero_extendsidi (unsigned int x)
1551
+ return -((signed long long) x);
1558
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
1559
+/* { dg-final { scan-assembler-times "sbc" 1 } } */
1560
+/* { dg-final { scan-assembler-times "mov" 0 } } */
1561
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
1562
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
1563
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
1565
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1566
+/* { dg-do compile } */
1567
+/* { dg-options "-O2" } */
1568
+/* { dg-add-options arm_arch_v8a } */
1570
+#include "../aarch64/atomic-op-acq_rel.x"
1572
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1573
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1574
+/* { dg-final { scan-assembler-not "dmb" } } */
1575
--- a/src/gcc/testsuite/gcc.target/arm/vselltsf.c
1576
+++ b/src/gcc/testsuite/gcc.target/arm/vselltsf.c
1578
+/* { dg-do compile } */
1579
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1580
+/* { dg-options "-O2" } */
1581
+/* { dg-add-options arm_v8_vfp } */
1584
+foo (float x, float y)
1586
+ volatile int i = 0;
1587
+ return i < 0 ? x : y;
1590
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
1591
--- a/src/gcc/testsuite/gcc.target/arm/vselnedf.c
1592
+++ b/src/gcc/testsuite/gcc.target/arm/vselnedf.c
1594
+/* { dg-do compile } */
1595
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1596
+/* { dg-options "-O2" } */
1597
+/* { dg-add-options arm_v8_vfp } */
1600
+foo (double x, double y)
1602
+ volatile int i = 0;
1603
+ return i != 0 ? x : y;
1606
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
1607
--- a/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
1608
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
1610
+/* { dg-do compile } */
1611
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1612
+/* { dg-options "-O2" } */
1613
+/* { dg-add-options arm_v8_vfp } */
1616
+foo (double x, double y)
1618
+ return !__builtin_isunordered (x, y) ? x : y;
1621
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
1622
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
1623
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
1625
+/* { dg-do compile } */
1626
+/* { dg-require-effective-target arm_v8_neon_ok } */
1627
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1628
+/* { dg-add-options arm_v8_neon } */
1633
+foo (float *output, float *input)
1636
+ /* Vectorizable. */
1637
+ for (i = 0; i < N; i++)
1638
+ output[i] = __builtin_truncf (input[i]);
1641
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_btruncf } } } */
1642
+/* { dg-final { cleanup-tree-dump "vect" } } */
1643
--- a/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
1644
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
1646
+/* { dg-do compile } */
1647
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1648
+/* { dg-options "-O2" } */
1649
+/* { dg-add-options arm_v8_vfp } */
1652
+foo (float x, float y)
1654
+ volatile int i = 0;
1655
+ return i == 0 ? x : y;
1658
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
1659
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
1660
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
1662
+/* Check that Neon is used to handle 64-bits scalar operations. */
1664
+/* { dg-do compile } */
1665
+/* { dg-require-effective-target arm_neon_ok } */
1666
+/* { dg-options "-O2 -mneon-for-64bits" } */
1667
+/* { dg-add-options arm_neon } */
1669
+typedef long long i64;
1670
+typedef unsigned long long u64;
1671
+typedef unsigned int u32;
1674
+/* Unary operators */
1675
+#define UNARY_OP(name, op) \
1676
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
1678
+/* Binary operators */
1679
+#define BINARY_OP(name, op) \
1680
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
1682
+/* Unsigned shift */
1683
+#define SHIFT_U(name, op, amount) \
1684
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
1687
+#define SHIFT_S(name, op, amount) \
1688
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
1698
+SHIFT_U(right1, >>, 1)
1699
+SHIFT_U(right2, >>, 2)
1700
+SHIFT_U(right5, >>, 5)
1701
+SHIFT_U(rightn, >>, c)
1703
+SHIFT_S(right1, >>, 1)
1704
+SHIFT_S(right2, >>, 2)
1705
+SHIFT_S(right5, >>, 5)
1706
+SHIFT_S(rightn, >>, c)
1708
+/* { dg-final {scan-assembler-times "vmvn" 1} } */
1709
+/* Two vadd: 1 in unary_not, 1 in binary_add */
1710
+/* { dg-final {scan-assembler-times "vadd" 2} } */
1711
+/* { dg-final {scan-assembler-times "vsub" 1} } */
1712
+/* { dg-final {scan-assembler-times "vand" 1} } */
1713
+/* { dg-final {scan-assembler-times "vorr" 1} } */
1714
+/* { dg-final {scan-assembler-times "veor" 1} } */
1715
+/* 6 vshr for right shifts by constant, and variable right shift uses
1716
+ vshl with a negative amount in register. */
1717
+/* { dg-final {scan-assembler-times "vshr" 6} } */
1718
+/* { dg-final {scan-assembler-times "vshl" 2} } */
1719
--- a/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
1720
+++ b/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
1722
+/* { dg-do compile } */
1723
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1724
+/* { dg-options "-O2" } */
1725
+/* { dg-add-options arm_v8_vfp } */
1728
+foo (double x, double y)
1730
+ return __builtin_isunordered (x, y) ? x : y;
1733
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
1734
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
1735
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
1737
+/* { dg-do compile } */
1738
+/* { dg-options "-O1" } */
1740
+long long muld(long long X, long long Y)
1745
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
1746
--- a/src/gcc/testsuite/gcc.target/arm/negdi-4.c
1747
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-4.c
1749
+/* { dg-do compile } */
1750
+/* { dg-require-effective-target arm32 } */
1751
+/* { dg-options "-O2" } */
1753
+signed long long negdi_extendsidi (signed int x)
1755
+ return -((signed long long) x);
1760
+ mov r1, r0, asr #31
1762
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
1763
+/* { dg-final { scan-assembler-times "asr" 1 } } */
1764
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
1765
--- a/src/gcc/testsuite/gcc.target/arm/vselltdf.c
1766
+++ b/src/gcc/testsuite/gcc.target/arm/vselltdf.c
1768
+/* { dg-do compile } */
1769
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1770
+/* { dg-options "-O2" } */
1771
+/* { dg-add-options arm_v8_vfp } */
1774
+foo (double x, double y)
1776
+ volatile int i = 0;
1777
+ return i < 0 ? x : y;
1780
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
1781
--- a/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
1782
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
1784
+/* { dg-do compile } */
1785
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1786
+/* { dg-options "-O2" } */
1787
+/* { dg-add-options arm_v8_vfp } */
1790
+foo (double x, double y)
1792
+ volatile int i = 0;
1793
+ return i == 0 ? x : y;
1796
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
1797
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
1798
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
1800
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1801
+/* { dg-do compile } */
1802
+/* { dg-options "-O2" } */
1803
+/* { dg-add-options arm_arch_v8a } */
1805
+#include "../aarch64/atomic-op-acquire.x"
1807
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1808
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1809
+/* { dg-final { scan-assembler-not "dmb" } } */
1810
--- a/src/gcc/testsuite/gcc.target/arm/vsellesf.c
1811
+++ b/src/gcc/testsuite/gcc.target/arm/vsellesf.c
1813
+/* { dg-do compile } */
1814
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1815
+/* { dg-options "-O2" } */
1816
+/* { dg-add-options arm_v8_vfp } */
1819
+foo (float x, float y)
1821
+ volatile int i = 0;
1822
+ return i <= 0 ? x : y;
1825
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
1826
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
1827
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
1829
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1830
+/* { dg-do compile } */
1831
+/* { dg-options "-O2" } */
1832
+/* { dg-add-options arm_arch_v8a } */
1834
+#include "../aarch64/atomic-op-int.x"
1836
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1837
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1838
+/* { dg-final { scan-assembler-not "dmb" } } */
1839
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
1840
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
1842
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1843
+/* { dg-do compile } */
1844
+/* { dg-options "-O2" } */
1845
+/* { dg-add-options arm_arch_v8a } */
1847
+#include "../aarch64/atomic-op-short.x"
1849
+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1850
+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1851
+/* { dg-final { scan-assembler-not "dmb" } } */
1852
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
1853
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
1855
+/* { dg-do compile } */
1856
+/* { dg-require-effective-target arm_v8_neon_ok } */
1857
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1858
+/* { dg-add-options arm_v8_neon } */
1863
+foo (float *output, float *input)
1866
+ /* Vectorizable. */
1867
+ for (i = 0; i < N; i++)
1868
+ output[i] = __builtin_ceilf (input[i]);
1871
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_ceilf } } } */
1872
+/* { dg-final { cleanup-tree-dump "vect" } } */
1873
--- a/src/gcc/testsuite/gcc.target/arm/vselledf.c
1874
+++ b/src/gcc/testsuite/gcc.target/arm/vselledf.c
1876
+/* { dg-do compile } */
1877
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1878
+/* { dg-options "-O2" } */
1879
+/* { dg-add-options arm_v8_vfp } */
1882
+foo (double x, double y)
1884
+ volatile int i = 0;
1885
+ return i <= 0 ? x : y;
1888
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
1889
--- a/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
1890
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
1892
+/* { dg-do compile } */
1893
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1894
+/* { dg-options "-O2" } */
1895
+/* { dg-add-options arm_v8_vfp } */
1898
+foo (float x, float y)
1900
+ volatile int i = 0;
1901
+ return i > 0 ? x : y;
1904
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
1905
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
1906
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
1908
+/* { dg-do run } */
1909
+/* { dg-options "-O3 --save-temps" } */
1911
+#include <arm_neon.h>
1913
+#include <stdlib.h>
1916
+test_frecps_float32_t (void)
1919
+ float32_t value = 0.2;
1920
+ float32_t reciprocal = 5.0;
1921
+ float32_t step = vrecpes_f32 (value);
1922
+ /* 3 steps should give us within ~0.001 accuracy. */
1923
+ for (i = 0; i < 3; i++)
1924
+ step = step * vrecpss_f32 (step, value);
1926
+ return fabs (step - reciprocal) < 0.001;
1929
+/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
1930
+/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
1933
+test_frecps_float32x2_t (void)
1938
+ const float32_t value_pool[] = {0.2, 0.4};
1939
+ const float32_t reciprocal_pool[] = {5.0, 2.5};
1940
+ float32x2_t value = vld1_f32 (value_pool);
1941
+ float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
1943
+ float32x2_t step = vrecpe_f32 (value);
1944
+ /* 3 steps should give us within ~0.001 accuracy. */
1945
+ for (i = 0; i < 3; i++)
1946
+ step = step * vrecps_f32 (step, value);
1948
+ ret &= fabs (vget_lane_f32 (step, 0)
1949
+ - vget_lane_f32 (reciprocal, 0)) < 0.001;
1950
+ ret &= fabs (vget_lane_f32 (step, 1)
1951
+ - vget_lane_f32 (reciprocal, 1)) < 0.001;
1956
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
1957
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
1960
+test_frecps_float32x4_t (void)
1965
+ const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
1966
+ const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
1967
+ float32x4_t value = vld1q_f32 (value_pool);
1968
+ float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
1970
+ float32x4_t step = vrecpeq_f32 (value);
1971
+ /* 3 steps should give us within ~0.001 accuracy. */
1972
+ for (i = 0; i < 3; i++)
1973
+ step = step * vrecpsq_f32 (step, value);
1975
+ ret &= fabs (vgetq_lane_f32 (step, 0)
1976
+ - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
1977
+ ret &= fabs (vgetq_lane_f32 (step, 1)
1978
+ - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
1979
+ ret &= fabs (vgetq_lane_f32 (step, 2)
1980
+ - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
1981
+ ret &= fabs (vgetq_lane_f32 (step, 3)
1982
+ - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
1987
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
1988
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
1991
+test_frecps_float64_t (void)
1994
+ float64_t value = 0.2;
1995
+ float64_t reciprocal = 5.0;
1996
+ float64_t step = vrecped_f64 (value);
1997
+ /* 3 steps should give us within ~0.001 accuracy. */
1998
+ for (i = 0; i < 3; i++)
1999
+ step = step * vrecpsd_f64 (step, value);
2001
+ return fabs (step - reciprocal) < 0.001;
2004
+/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
2005
+/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2008
+test_frecps_float64x2_t (void)
2013
+ const float64_t value_pool[] = {0.2, 0.4};
2014
+ const float64_t reciprocal_pool[] = {5.0, 2.5};
2015
+ float64x2_t value = vld1q_f64 (value_pool);
2016
+ float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
2018
+ float64x2_t step = vrecpeq_f64 (value);
2019
+ /* 3 steps should give us within ~0.001 accuracy. */
2020
+ for (i = 0; i < 3; i++)
2021
+ step = step * vrecpsq_f64 (step, value);
2023
+ ret &= fabs (vgetq_lane_f64 (step, 0)
2024
+ - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
2025
+ ret &= fabs (vgetq_lane_f64 (step, 1)
2026
+ - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
2031
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
2032
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
2035
+main (int argc, char **argv)
2037
+ if (!test_frecps_float32_t ())
2039
+ if (!test_frecps_float32x2_t ())
2041
+ if (!test_frecps_float32x4_t ())
2043
+ if (!test_frecps_float64_t ())
2045
+ if (!test_frecps_float64x2_t ())
2051
+/* { dg-final { cleanup-saved-temps } } */
2052
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
2053
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c
2055
+/* { dg-do run } */
2056
+/* { dg-options "-O3 --save-temps" } */
2058
+#include <arm_neon.h>
2060
+extern void abort (void);
2061
+extern float fabsf (float);
2062
+extern double fabs (double);
2064
+#define NUM_TESTS 8
2066
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
2067
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
2068
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
2069
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
2071
+#define TEST(TEST, CMP, SUFFIX, WIDTH, F) \
2073
+test_fca##TEST##SUFFIX##_float##WIDTH##_t (void) \
2077
+ uint##WIDTH##_t output[NUM_TESTS]; \
2079
+ for (i = 0; i < NUM_TESTS; i++) \
2081
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
2082
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
2083
+ /* Inhibit optimization of our linear test loop. */ \
2084
+ asm volatile ("" : : : "memory"); \
2085
+ output[i] = f1 CMP f2 ? -1 : 0; \
2088
+ for (i = 0; i < NUM_TESTS; i++) \
2090
+ output[i] = vca##TEST##SUFFIX##_f##WIDTH (input_##SUFFIX##1[i], \
2091
+ input_##SUFFIX##2[i]) \
2093
+ /* Inhibit autovectorization of our scalar test loop. */ \
2094
+ asm volatile ("" : : : "memory"); \
2097
+ for (i = 0; i < NUM_TESTS; i++) \
2098
+ ret |= output[i]; \
2103
+TEST (ge, >=, s, 32, f)
2104
+/* { dg-final { scan-assembler "facge\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
2105
+TEST (ge, >=, d, 64, )
2106
+/* { dg-final { scan-assembler "facge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2107
+TEST (gt, >, s, 32, f)
2108
+/* { dg-final { scan-assembler "facgt\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
2109
+TEST (gt, >, d, 64, )
2110
+/* { dg-final { scan-assembler "facgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
2113
+main (int argc, char **argv)
2115
+ if (test_fcages_float32_t ())
2117
+ if (test_fcaged_float64_t ())
2119
+ if (test_fcagts_float32_t ())
2121
+ if (test_fcagtd_float64_t ())
2126
+/* { dg-final { cleanup-saved-temps } } */
2127
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
2128
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
2133
+atomic_fetch_add_ACQ_REL (int a)
2135
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
2139
+atomic_fetch_sub_ACQ_REL (int a)
2141
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
2145
+atomic_fetch_and_ACQ_REL (int a)
2147
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
2151
+atomic_fetch_nand_ACQ_REL (int a)
2153
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
2157
+atomic_fetch_xor_ACQ_REL (int a)
2159
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
2163
+atomic_fetch_or_ACQ_REL (int a)
2165
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
2167
--- a/src/gcc/testsuite/gcc.target/aarch64/extr.c
2168
+++ b/src/gcc/testsuite/gcc.target/aarch64/extr.c
2170
+/* { dg-options "-O2 --save-temps" } */
2171
+/* { dg-do run } */
2173
+extern void abort (void);
2176
+test_si (int a, int b)
2178
+ /* { dg-final { scan-assembler "extr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, 27\n" } } */
2179
+ return (a << 5) | ((unsigned int) b >> 27);
2183
+test_di (long long a, long long b)
2185
+ /* { dg-final { scan-assembler "extr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, 45\n" } } */
2186
+ return (a << 19) | ((unsigned long long) b >> 45);
2194
+ v = test_si (0x00000004, 0x30000000);
2195
+ if (v != 0x00000086)
2197
+ w = test_di (0x0001040040040004ll, 0x0070050066666666ll);
2198
+ if (w != 0x2002002000200380ll)
2203
+/* { dg-final { cleanup-saved-temps } } */
2204
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2205
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2207
/* { dg-final { scan-assembler "uminv" } } */
2208
/* { dg-final { scan-assembler "smaxv" } } */
2209
/* { dg-final { scan-assembler "sminv" } } */
2210
+/* { dg-final { scan-assembler "sabd" } } */
2211
+/* { dg-final { scan-assembler "saba" } } */
2212
/* { dg-final { scan-assembler-times "addv" 2} } */
2213
/* { dg-final { scan-assembler-times "addp" 2} } */
2214
--- a/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2215
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2217
+/* { dg-do run } */
2218
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2220
+extern void abort (void);
2221
+typedef long long s64;
2224
+adds_ext (s64 a, int b, int c)
2235
+adds_shift_ext (s64 a, int b, int c)
2237
+ s64 d = (a + ((s64)b << 3));
2250
+ x = adds_ext (0x13000002ll, 41, 15);
2251
+ if (x != 318767203)
2254
+ x = adds_ext (0x50505050ll, 29, 4);
2255
+ if (x != 1347440782)
2258
+ x = adds_ext (0x12121212121ll, 2, 14);
2259
+ if (x != 555819315)
2262
+ x = adds_shift_ext (0x123456789ll, 4, 12);
2263
+ if (x != 591751097)
2266
+ x = adds_shift_ext (0x02020202ll, 9, 8);
2267
+ if (x != 33686107)
2270
+ x = adds_shift_ext (0x987987987987ll, 23, 41);
2271
+ if (x != -2020050305)
2277
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
2278
--- a/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2279
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2281
+/* { dg-do run } */
2282
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2284
+extern void abort (void);
2287
+subs_si_test1 (int a, int b, int c)
2291
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2292
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2300
+subs_si_test2 (int a, int b, int c)
2302
+ int d = a - 0xfff;
2304
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2305
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2313
+subs_si_test3 (int a, int b, int c)
2315
+ int d = a - (b << 3);
2317
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2318
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2325
+typedef long long s64;
2328
+subs_di_test1 (s64 a, s64 b, s64 c)
2332
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2333
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2341
+subs_di_test2 (s64 a, s64 b, s64 c)
2343
+ s64 d = a - 0x1000ll;
2345
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2346
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2354
+subs_di_test3 (s64 a, s64 b, s64 c)
2356
+ s64 d = a - (b << 3);
2358
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2359
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2371
+ x = subs_si_test1 (29, 4, 5);
2375
+ x = subs_si_test1 (5, 2, 20);
2379
+ x = subs_si_test2 (29, 4, 5);
2383
+ x = subs_si_test2 (1024, 2, 20);
2387
+ x = subs_si_test3 (35, 4, 5);
2391
+ x = subs_si_test3 (5, 2, 20);
2395
+ y = subs_di_test1 (0x130000029ll,
2399
+ if (y != 0x63505052e)
2402
+ y = subs_di_test1 (0x5000500050005ll,
2403
+ 0x2111211121112ll,
2404
+ 0x0000000002020ll);
2405
+ if (y != 0x5000500052025)
2408
+ y = subs_di_test2 (0x130000029ll,
2411
+ if (y != 0x95504f532)
2414
+ y = subs_di_test2 (0x540004100ll,
2417
+ if (y != 0x1065053309)
2420
+ y = subs_di_test3 (0x130000029ll,
2423
+ if (y != 0x63505052e)
2426
+ y = subs_di_test3 (0x130002900ll,
2429
+ if (y != 0x635052e05)
2435
+/* { dg-final { cleanup-saved-temps } } */
2436
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
2437
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
2439
+/* { dg-do run } */
2440
+/* { dg-options "-O3 --save-temps" } */
2442
+#include <arm_neon.h>
2444
+#include <stdlib.h>
2447
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
2448
+float32_t rec_f[] =
2449
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
2451
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
2452
+float32_t rec_d[] =
2453
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
2456
+test_frecpx_float32_t (void)
2460
+ for (i = 0; i < 8; i++)
2461
+ ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001;
2466
+/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */
2469
+test_frecpx_float64_t (void)
2473
+ for (i = 0; i < 8; i++)
2474
+ ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001;
2479
+/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */
2482
+main (int argc, char **argv)
2484
+ if (!test_frecpx_float32_t ())
2486
+ if (!test_frecpx_float64_t ())
2492
+/* { dg-final { cleanup-saved-temps } } */
2493
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
2494
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c
2496
+/* { dg-do run } */
2497
+/* { dg-options "-O3 --save-temps" } */
2499
+#include <arm_neon.h>
2501
+extern void abort (void);
2502
+extern float fabsf (float);
2503
+extern double fabs (double);
2505
+#define NUM_TESTS 8
2507
+float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
2508
+float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f};
2509
+double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
2510
+double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5};
2512
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
2514
+test_vca##T##_float##WIDTH##x##LANES##_t (void) \
2518
+ uint##WIDTH##_t output[NUM_TESTS]; \
2520
+ for (i = 0; i < NUM_TESTS; i++) \
2522
+ float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \
2523
+ float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \
2524
+ /* Inhibit optimization of our linear test loop. */ \
2525
+ asm volatile ("" : : : "memory"); \
2526
+ output[i] = f1 CMP f2 ? -1 : 0; \
2529
+ for (i = 0; i < NUM_TESTS; i += LANES) \
2531
+ float##WIDTH##x##LANES##_t in1 = \
2532
+ vld1##Q##_f##WIDTH (input_##SUFFIX##1 + i); \
2533
+ float##WIDTH##x##LANES##_t in2 = \
2534
+ vld1##Q##_f##WIDTH (input_##SUFFIX##2 + i); \
2535
+ uint##WIDTH##x##LANES##_t expected_out = \
2536
+ vld1##Q##_u##WIDTH (output + i); \
2537
+ uint##WIDTH##x##LANES##_t out = \
2538
+ veor##Q##_u##WIDTH (vca##T##Q##_f##WIDTH (in1, in2), \
2540
+ vst1##Q##_u##WIDTH (output + i, out); \
2543
+ for (i = 0; i < NUM_TESTS; i++) \
2544
+ ret |= output[i]; \
2549
+#define BUILD_VARIANTS(T, CMP) \
2550
+TEST (T, CMP, s, 32, 2, , f) \
2551
+TEST (T, CMP, s, 32, 4, q, f) \
2552
+TEST (T, CMP, d, 64, 2, q, )
2554
+BUILD_VARIANTS (ge, >=)
2555
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2556
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2557
+/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2559
+BUILD_VARIANTS (gt, >)
2560
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2561
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2562
+/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2564
+/* No need for another scan-assembler as these tests
2565
+ also generate facge, facgt instructions. */
2566
+BUILD_VARIANTS (le, <=)
2567
+BUILD_VARIANTS (lt, <)
2570
+#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \
2571
+if (test_vca##T##_float##WIDTH##x##LANES##_t ()) \
2575
+main (int argc, char **argv)
2577
+BUILD_VARIANTS (ge, >=)
2578
+BUILD_VARIANTS (gt, >)
2579
+BUILD_VARIANTS (le, <=)
2580
+BUILD_VARIANTS (lt, <)
2584
+/* { dg-final { cleanup-saved-temps } } */
2585
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
2586
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
2588
+/* { dg-do run } */
2589
+/* { dg-options "-O3 --save-temps" } */
2591
+#include <arm_neon.h>
2593
+extern void abort (void);
2594
+extern float fabsf (float);
2595
+extern double fabs (double);
2597
+extern double trunc (double);
2598
+extern double round (double);
2599
+extern double nearbyint (double);
2600
+extern double floor (double);
2601
+extern double ceil (double);
2602
+extern double rint (double);
2604
+extern float truncf (float);
2605
+extern float roundf (float);
2606
+extern float nearbyintf (float);
2607
+extern float floorf (float);
2608
+extern float ceilf (float);
2609
+extern float rintf (float);
2611
+#define NUM_TESTS 8
2612
+#define DELTA 0.000001
2614
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
2615
+ 200.0f, -800.0f, -13.0f, -0.5f};
2616
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
2617
+ 200.0, -800.0, -13.0, -0.5};
2619
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
2621
+test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \
2625
+ int nlanes = LANES; \
2626
+ float##WIDTH##_t expected_out[NUM_TESTS]; \
2627
+ float##WIDTH##_t actual_out[NUM_TESTS]; \
2629
+ for (i = 0; i < NUM_TESTS; i++) \
2631
+ expected_out[i] = C_FN##F (input_f##WIDTH[i]); \
2632
+ /* Don't vectorize this. */ \
2633
+ asm volatile ("" : : : "memory"); \
2636
+ /* Prevent the compiler from noticing these two loops do the same \
2637
+ thing and optimizing away the comparison. */ \
2638
+ asm volatile ("" : : : "memory"); \
2640
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
2642
+ float##WIDTH##x##LANES##_t out = \
2643
+ vrnd##SUFFIX##Q##_f##WIDTH \
2644
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
2645
+ vst1##Q##_f##WIDTH (actual_out + i, out); \
2648
+ for (i = 0; i < NUM_TESTS; i++) \
2649
+ ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \
2655
+#define BUILD_VARIANTS(SUFFIX, C_FN) \
2656
+TEST (SUFFIX, , 32, 2, C_FN, f) \
2657
+TEST (SUFFIX, q, 32, 4, C_FN, f) \
2658
+TEST (SUFFIX, q, 64, 2, C_FN, ) \
2660
+BUILD_VARIANTS ( , trunc)
2661
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2662
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2663
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2664
+BUILD_VARIANTS (a, round)
2665
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2666
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2667
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2668
+BUILD_VARIANTS (i, nearbyint)
2669
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2670
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2671
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2672
+BUILD_VARIANTS (m, floor)
2673
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2674
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2675
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2676
+BUILD_VARIANTS (p, ceil)
2677
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2678
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2679
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2680
+BUILD_VARIANTS (x, rint)
2681
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2682
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2683
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2686
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
2688
+ if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \
2693
+main (int argc, char **argv)
2695
+ BUILD_VARIANTS ( , trunc)
2696
+ BUILD_VARIANTS (a, round)
2697
+ BUILD_VARIANTS (i, nearbyint)
2698
+ BUILD_VARIANTS (m, floor)
2699
+ BUILD_VARIANTS (p, ceil)
2700
+ BUILD_VARIANTS (x, rint)
2704
+/* { dg-final { cleanup-saved-temps } } */
2705
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
2706
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
2708
/* { dg-do compile } */
2709
/* { dg-options "-O2" } */
2712
+#include "atomic-op-relaxed.x"
2715
-atomic_fetch_add_RELAXED (int a)
2717
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
2721
-atomic_fetch_sub_RELAXED (int a)
2723
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
2727
-atomic_fetch_and_RELAXED (int a)
2729
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
2733
-atomic_fetch_nand_RELAXED (int a)
2735
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
2739
-atomic_fetch_xor_RELAXED (int a)
2741
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
2745
-atomic_fetch_or_RELAXED (int a)
2747
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
2750
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2751
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2752
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
2753
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
2756
+/* { dg-do compile } */
2757
+/* { dg-options "-O3" } */
2759
+#include "arm_neon.h"
2761
+#include "vaddv-intrinsic.x"
2763
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
2764
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
2765
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
2766
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
2767
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
2772
+atomic_fetch_add_RELAXED (int a)
2774
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
2778
+atomic_fetch_sub_RELAXED (int a)
2780
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
2784
+atomic_fetch_and_RELAXED (int a)
2786
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
2790
+atomic_fetch_nand_RELAXED (int a)
2792
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
2796
+atomic_fetch_xor_RELAXED (int a)
2798
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
2802
+atomic_fetch_or_RELAXED (int a)
2804
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
2806
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.c
2807
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.c
2809
int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
2810
unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
2811
unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
2812
+ int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2813
+ int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2814
int reduce_smax_value = 0;
2815
int reduce_smin_value = -15;
2816
unsigned int reduce_umax_value = 15;
2823
TESTV (reduce_smax, s);
2824
TESTV (reduce_smin, s);
2825
TESTV (reduce_umax, u);
2826
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
2827
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c
2829
+/* { dg-do compile } */
2830
+/* { dg-options "-g -mgeneral-regs-only" } */
2833
+foo (const char *c, ...)
2836
+ buf[256 - 1] = '\0';
2838
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
2839
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
2841
/* { dg-do compile } */
2842
/* { dg-options "-O2" } */
2845
+#include "atomic-op-acquire.x"
2848
-atomic_fetch_add_ACQUIRE (int a)
2850
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
2854
-atomic_fetch_sub_ACQUIRE (int a)
2856
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
2860
-atomic_fetch_and_ACQUIRE (int a)
2862
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
2866
-atomic_fetch_nand_ACQUIRE (int a)
2868
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
2872
-atomic_fetch_xor_ACQUIRE (int a)
2874
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
2878
-atomic_fetch_or_ACQUIRE (int a)
2880
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
2883
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2884
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2885
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
2886
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
2888
/* { dg-do compile } */
2889
/* { dg-options "-O2" } */
2894
+#include "atomic-comp-swap-release-acquire.x"
2897
-atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
2899
- return __atomic_compare_exchange (&v, &a, &b,
2900
- STRONG, __ATOMIC_RELEASE,
2901
- __ATOMIC_ACQUIRE);
2905
-atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
2907
- return __atomic_compare_exchange (&v, &a, &b,
2908
- WEAK, __ATOMIC_RELEASE,
2909
- __ATOMIC_ACQUIRE);
2913
-atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
2915
- return __atomic_compare_exchange_n (&v, &a, b,
2916
- STRONG, __ATOMIC_RELEASE,
2917
- __ATOMIC_ACQUIRE);
2921
-atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
2923
- return __atomic_compare_exchange_n (&v, &a, b,
2924
- WEAK, __ATOMIC_RELEASE,
2925
- __ATOMIC_ACQUIRE);
2928
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
2929
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
2930
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
2931
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
2932
@@ -138,3 +138,17 @@
2937
+void sabd (pRINT a, pRINT b, pRINT c)
2940
+ for (i = 0; i < 16; i++)
2941
+ c[i] = abs (a[i] - b[i]);
2944
+void saba (pRINT a, pRINT b, pRINT c)
2947
+ for (i = 0; i < 16; i++)
2948
+ c[i] += abs (a[i] - b[i]);
2950
--- a/src/gcc/testsuite/gcc.target/aarch64/subs3.c
2951
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs3.c
2953
+/* { dg-do run } */
2954
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2956
+extern void abort (void);
2957
+typedef long long s64;
2960
+subs_ext (s64 a, int b, int c)
2971
+subs_shift_ext (s64 a, int b, int c)
2973
+ s64 d = (a - ((s64)b << 3));
2986
+ x = subs_ext (0x13000002ll, 41, 15);
2987
+ if (x != 318767121)
2990
+ x = subs_ext (0x50505050ll, 29, 4);
2991
+ if (x != 1347440724)
2994
+ x = subs_ext (0x12121212121ll, 2, 14);
2995
+ if (x != 555819311)
2998
+ x = subs_shift_ext (0x123456789ll, 4, 12);
2999
+ if (x != 591751033)
3002
+ x = subs_shift_ext (0x02020202ll, 9, 8);
3003
+ if (x != 33685963)
3006
+ x = subs_shift_ext (0x987987987987ll, 23, 41);
3007
+ if (x != -2020050673)
3013
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
3014
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
3015
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
3020
+atomic_fetch_add_ACQUIRE (int a)
3022
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
3026
+atomic_fetch_sub_ACQUIRE (int a)
3028
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
3032
+atomic_fetch_and_ACQUIRE (int a)
3034
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
3038
+atomic_fetch_nand_ACQUIRE (int a)
3040
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
3044
+atomic_fetch_xor_ACQUIRE (int a)
3046
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
3050
+atomic_fetch_or_ACQUIRE (int a)
3052
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
3054
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
3055
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
3058
+/* { dg-do run } */
3059
+/* { dg-options "-O3" } */
3061
+#include "arm_neon.h"
3063
+extern void abort (void);
3065
+#include "vaddv-intrinsic.x"
3070
+ const float32_t pool_v2sf[] = {4.0f, 9.0f};
3071
+ const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
3072
+ const float64_t pool_v2df[] = {4.0, 9.0};
3074
+ if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
3077
+ if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
3080
+ if (test_vaddv_v2df (pool_v2df) != 13.0)
3085
--- a/src/gcc/testsuite/gcc.target/aarch64/sbc.c
3086
+++ b/src/gcc/testsuite/gcc.target/aarch64/sbc.c
3088
+/* { dg-do run } */
3089
+/* { dg-options "-O2 --save-temps" } */
3091
+extern void abort (void);
3093
+typedef unsigned int u32int;
3094
+typedef unsigned long long u64int;
3097
+test_si (u32int w1, u32int w2, u32int w3, u32int w4)
3100
+ /* { dg-final { scan-assembler "sbc\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+\n" } } */
3101
+ w0 = w1 - w2 - (w3 < w4);
3106
+test_di (u64int x1, u64int x2, u64int x3, u64int x4)
3109
+ /* { dg-final { scan-assembler "sbc\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+\n" } } */
3110
+ x0 = x1 - x2 - (x3 < x4);
3119
+ x = test_si (7, 8, 12, 15);
3122
+ y = test_di (0x987654321ll, 0x123456789ll, 0x345345345ll, 0x123123123ll);
3123
+ if (y != 0x8641fdb98ll)
3128
+/* { dg-final { cleanup-saved-temps } } */
3129
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
3130
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
3138
+atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
3140
+ return __atomic_compare_exchange (&v, &a, &b,
3141
+ STRONG, __ATOMIC_RELEASE,
3142
+ __ATOMIC_ACQUIRE);
3146
+atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
3148
+ return __atomic_compare_exchange (&v, &a, &b,
3149
+ WEAK, __ATOMIC_RELEASE,
3150
+ __ATOMIC_ACQUIRE);
3154
+atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
3156
+ return __atomic_compare_exchange_n (&v, &a, b,
3157
+ STRONG, __ATOMIC_RELEASE,
3158
+ __ATOMIC_ACQUIRE);
3162
+atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
3164
+ return __atomic_compare_exchange_n (&v, &a, b,
3165
+ WEAK, __ATOMIC_RELEASE,
3166
+ __ATOMIC_ACQUIRE);
3168
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
3169
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
3171
/* { dg-do compile } */
3172
/* { dg-options "-O2" } */
3174
-#include "../../../config/aarch64/arm_neon.h"
3175
+#include <arm_neon.h>
3177
+/* Used to force a variable to a SIMD register. */
3178
+#define force_simd(V1) asm volatile ("mov %d0, %d1" \
3181
+ : /* No clobbers */);
3183
/* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
3188
test_vceqd_s64 (int64x1_t a, int64x1_t b)
3190
- return vceqd_s64 (a, b);
3194
+ res = vceqd_s64 (a, b);
3199
/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
3202
test_vceqzd_s64 (int64x1_t a)
3204
- return vceqzd_s64 (a);
3207
+ res = vceqzd_s64 (a);
3212
/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
3215
test_vcged_s64 (int64x1_t a, int64x1_t b)
3217
- return vcged_s64 (a, b);
3221
+ res = vcged_s64 (a, b);
3227
test_vcled_s64 (int64x1_t a, int64x1_t b)
3229
- return vcled_s64 (a, b);
3233
+ res = vcled_s64 (a, b);
3238
-/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
3239
+/* Idiom recognition will cause this testcase not to generate
3240
+ the expected cmge instruction, so do not check for it. */
3243
test_vcgezd_s64 (int64x1_t a)
3245
- return vcgezd_s64 (a);
3248
+ res = vcgezd_s64 (a);
3253
/* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
3256
test_vcged_u64 (uint64x1_t a, uint64x1_t b)
3258
- return vcged_u64 (a, b);
3262
+ res = vcged_u64 (a, b);
3267
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
3268
@@ -77,13 +112,23 @@
3270
test_vcgtd_s64 (int64x1_t a, int64x1_t b)
3272
- return vcgtd_s64 (a, b);
3276
+ res = vcgtd_s64 (a, b);
3282
test_vcltd_s64 (int64x1_t a, int64x1_t b)
3284
- return vcltd_s64 (a, b);
3288
+ res = vcltd_s64 (a, b);
3293
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
3296
test_vcgtzd_s64 (int64x1_t a)
3298
- return vcgtzd_s64 (a);
3301
+ res = vcgtzd_s64 (a);
3306
/* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
3309
test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
3311
- return vcgtd_u64 (a, b);
3315
+ res = vcgtd_u64 (a, b);
3320
/* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
3321
@@ -107,15 +161,24 @@
3323
test_vclezd_s64 (int64x1_t a)
3325
- return vclezd_s64 (a);
3328
+ res = vclezd_s64 (a);
3333
-/* { dg-final { scan-assembler-times "\\tcmlt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
3334
+/* Idiom recognition will cause this testcase not to generate
3335
+ the expected cmlt instruction, so do not check for it. */
3338
test_vcltzd_s64 (int64x1_t a)
3340
- return vcltzd_s64 (a);
3343
+ res = vcltzd_s64 (a);
3348
/* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */
3349
@@ -179,13 +242,23 @@
3351
test_vtst_s64 (int64x1_t a, int64x1_t b)
3353
- return vtstd_s64 (a, b);
3357
+ res = vtstd_s64 (a, b);
3363
test_vtst_u64 (uint64x1_t a, uint64x1_t b)
3365
- return vtstd_u64 (a, b);
3369
+ res = vtstd_s64 (a, b);
3374
/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
3375
@@ -722,8 +795,11 @@
3376
return vrshld_u64 (a, b);
3379
-/* { dg-final { scan-assembler-times "\\tasr\\tx\[0-9\]+" 1 } } */
3380
+/* Other intrinsics can generate an asr instruction (vcltzd, vcgezd),
3381
+ so we cannot check scan-assembler-times. */
3383
+/* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
3386
test_vshrd_n_s64 (int64x1_t a)
3388
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
3389
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
3391
/* { dg-do compile } */
3392
/* { dg-options "-O2" } */
3395
+#include "atomic-op-int.x"
3398
-atomic_fetch_add_RELAXED (int a)
3400
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3404
-atomic_fetch_sub_RELAXED (int a)
3406
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3410
-atomic_fetch_and_RELAXED (int a)
3412
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3416
-atomic_fetch_nand_RELAXED (int a)
3418
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3422
-atomic_fetch_xor_RELAXED (int a)
3424
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3428
-atomic_fetch_or_RELAXED (int a)
3430
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3433
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3434
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3435
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
3436
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
3438
/* { dg-do compile } */
3439
/* { dg-options "-O2" } */
3442
+#include "atomic-op-seq_cst.x"
3445
-atomic_fetch_add_SEQ_CST (int a)
3447
- return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
3451
-atomic_fetch_sub_SEQ_CST (int a)
3453
- return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
3457
-atomic_fetch_and_SEQ_CST (int a)
3459
- return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
3463
-atomic_fetch_nand_SEQ_CST (int a)
3465
- return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
3469
-atomic_fetch_xor_SEQ_CST (int a)
3471
- return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
3475
-atomic_fetch_or_SEQ_CST (int a)
3477
- return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
3480
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3481
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3482
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
3483
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
3487
+test_vaddv_v2sf (const float32_t *pool)
3491
+ val = vld1_f32 (pool);
3492
+ return vaddv_f32 (val);
3496
+test_vaddv_v4sf (const float32_t *pool)
3500
+ val = vld1q_f32 (pool);
3501
+ return vaddvq_f32 (val);
3505
+test_vaddv_v2df (const float64_t *pool)
3509
+ val = vld1q_f64 (pool);
3510
+ return vaddvq_f64 (val);
3512
--- a/src/gcc/testsuite/gcc.target/aarch64/negs.c
3513
+++ b/src/gcc/testsuite/gcc.target/aarch64/negs.c
3515
+/* { dg-do run } */
3516
+/* { dg-options "-O2 --save-temps" } */
3518
+extern void abort (void);
3522
+negs_si_test1 (int a, int b, int c)
3526
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+" } } */
3535
+negs_si_test3 (int a, int b, int c)
3537
+ int d = -(b) << 3;
3539
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */
3547
+typedef long long s64;
3551
+negs_di_test1 (s64 a, s64 b, s64 c)
3555
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+" } } */
3564
+negs_di_test3 (s64 a, s64 b, s64 c)
3566
+ s64 d = -(b) << 3;
3568
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */
3581
+ x = negs_si_test1 (2, 12, 5);
3585
+ x = negs_si_test1 (1, 2, 32);
3589
+ x = negs_si_test3 (13, 14, 5);
3593
+ x = negs_si_test3 (15, 21, 2);
3597
+ y = negs_di_test1 (0x20202020ll,
3600
+ if (y != 0x62636263ll)
3603
+ y = negs_di_test1 (0x1010101010101ll,
3604
+ 0x123456789abcdll,
3605
+ 0x5555555555555ll);
3606
+ if (y != 0x6565656565656ll)
3609
+ y = negs_di_test3 (0x62523781ll,
3612
+ if (y != 0xfffffffd553d4edbll)
3615
+ y = negs_di_test3 (0x763526268ll,
3618
+ if (y != 0xfffffffb1b1b1b1bll)
3623
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
3624
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
3626
/* { dg-do compile } */
3627
/* { dg-options "-O2" } */
3630
+#include "atomic-op-consume.x"
3633
-atomic_fetch_add_CONSUME (int a)
3635
- return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
3639
-atomic_fetch_sub_CONSUME (int a)
3641
- return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
3645
-atomic_fetch_and_CONSUME (int a)
3647
- return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
3651
-atomic_fetch_nand_CONSUME (int a)
3653
- return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
3657
-atomic_fetch_xor_CONSUME (int a)
3659
- return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
3663
-atomic_fetch_or_CONSUME (int a)
3665
- return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
3668
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3669
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3670
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
3671
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
3673
/* { dg-do compile } */
3674
/* { dg-options "-O2" } */
3677
+#include "atomic-op-char.x"
3680
-atomic_fetch_add_RELAXED (char a)
3682
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3686
-atomic_fetch_sub_RELAXED (char a)
3688
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3692
-atomic_fetch_and_RELAXED (char a)
3694
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3698
-atomic_fetch_nand_RELAXED (char a)
3700
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3704
-atomic_fetch_xor_RELAXED (char a)
3706
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3710
-atomic_fetch_or_RELAXED (char a)
3712
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3715
/* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3716
/* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3717
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
3718
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
3723
+atomic_fetch_add_RELAXED (int a)
3725
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3729
+atomic_fetch_sub_RELAXED (int a)
3731
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3735
+atomic_fetch_and_RELAXED (int a)
3737
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3741
+atomic_fetch_nand_RELAXED (int a)
3743
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3747
+atomic_fetch_xor_RELAXED (int a)
3749
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3753
+atomic_fetch_or_RELAXED (int a)
3755
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3757
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
3758
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
3763
+atomic_fetch_add_SEQ_CST (int a)
3765
+ return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
3769
+atomic_fetch_sub_SEQ_CST (int a)
3771
+ return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
3775
+atomic_fetch_and_SEQ_CST (int a)
3777
+ return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
3781
+atomic_fetch_nand_SEQ_CST (int a)
3783
+ return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
3787
+atomic_fetch_xor_SEQ_CST (int a)
3789
+ return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
3793
+atomic_fetch_or_SEQ_CST (int a)
3795
+ return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
3797
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
3798
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
3803
+atomic_fetch_add_CONSUME (int a)
3805
+ return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
3809
+atomic_fetch_sub_CONSUME (int a)
3811
+ return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
3815
+atomic_fetch_and_CONSUME (int a)
3817
+ return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
3821
+atomic_fetch_nand_CONSUME (int a)
3823
+ return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
3827
+atomic_fetch_xor_CONSUME (int a)
3829
+ return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
3833
+atomic_fetch_or_CONSUME (int a)
3835
+ return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
3837
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
3838
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
3840
/* { dg-do compile } */
3841
/* { dg-options "-O2" } */
3844
+#include "atomic-op-short.x"
3847
-atomic_fetch_add_RELAXED (short a)
3849
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3853
-atomic_fetch_sub_RELAXED (short a)
3855
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3859
-atomic_fetch_and_RELAXED (short a)
3861
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3865
-atomic_fetch_nand_RELAXED (short a)
3867
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3871
-atomic_fetch_xor_RELAXED (short a)
3873
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3877
-atomic_fetch_or_RELAXED (short a)
3879
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3882
/* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3883
/* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3884
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
3885
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
3890
+atomic_fetch_add_RELAXED (char a)
3892
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3896
+atomic_fetch_sub_RELAXED (char a)
3898
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3902
+atomic_fetch_and_RELAXED (char a)
3904
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3908
+atomic_fetch_nand_RELAXED (char a)
3910
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3914
+atomic_fetch_xor_RELAXED (char a)
3916
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3920
+atomic_fetch_or_RELAXED (char a)
3922
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3924
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
3925
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
3927
/* { dg-final { scan-assembler "fdiv\\tv" } } */
3928
/* { dg-final { scan-assembler "fneg\\tv" } } */
3929
/* { dg-final { scan-assembler "fabs\\tv" } } */
3930
+/* { dg-final { scan-assembler "fabd\\tv" } } */
3931
--- a/src/gcc/testsuite/gcc.target/aarch64/adds1.c
3932
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds1.c
3934
+/* { dg-do run } */
3935
+/* { dg-options "-O2 --save-temps -fno-inline" } */
3937
+extern void abort (void);
3940
+adds_si_test1 (int a, int b, int c)
3944
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
3952
+adds_si_test2 (int a, int b, int c)
3956
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, 255" } } */
3964
+adds_si_test3 (int a, int b, int c)
3966
+ int d = a + (b << 3);
3968
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
3975
+typedef long long s64;
3978
+adds_di_test1 (s64 a, s64 b, s64 c)
3982
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
3990
+adds_di_test2 (s64 a, s64 b, s64 c)
3994
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, 255" } } */
4002
+adds_di_test3 (s64 a, s64 b, s64 c)
4004
+ s64 d = a + (b << 3);
4006
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4018
+ x = adds_si_test1 (29, 4, 5);
4022
+ x = adds_si_test1 (5, 2, 20);
4026
+ x = adds_si_test2 (29, 4, 5);
4030
+ x = adds_si_test2 (1024, 2, 20);
4034
+ x = adds_si_test3 (35, 4, 5);
4038
+ x = adds_si_test3 (5, 2, 20);
4042
+ y = adds_di_test1 (0x130000029ll,
4046
+ if (y != 0xc75050536)
4049
+ y = adds_di_test1 (0x5000500050005ll,
4050
+ 0x2111211121112ll,
4051
+ 0x0000000002020ll);
4052
+ if (y != 0x9222922294249)
4055
+ y = adds_di_test2 (0x130000029ll,
4058
+ if (y != 0x955050631)
4061
+ y = adds_di_test2 (0x130002900ll,
4064
+ if (y != 0x955052f08)
4067
+ y = adds_di_test3 (0x130000029ll,
4070
+ if (y != 0x9b9050576)
4073
+ y = adds_di_test3 (0x130002900ll,
4076
+ if (y != 0xafd052e4d)
4082
+/* { dg-final { cleanup-saved-temps } } */
4083
--- a/src/gcc/testsuite/gcc.target/aarch64/ror.c
4084
+++ b/src/gcc/testsuite/gcc.target/aarch64/ror.c
4086
+/* { dg-options "-O2 --save-temps" } */
4087
+/* { dg-do run } */
4089
+extern void abort (void);
4094
+ /* { dg-final { scan-assembler "ror\tw\[0-9\]+, w\[0-9\]+, 27\n" } } */
4095
+ return (a << 5) | ((unsigned int) a >> 27);
4099
+test_di (long long a)
4101
+ /* { dg-final { scan-assembler "ror\tx\[0-9\]+, x\[0-9\]+, 45\n" } } */
4102
+ return (a << 19) | ((unsigned long long) a >> 45);
4110
+ v = test_si (0x0203050);
4111
+ if (v != 0x4060a00)
4113
+ w = test_di (0x0000020506010304ll);
4114
+ if (w != 0x1028300818200000ll)
4119
+/* { dg-final { cleanup-saved-temps } } */
4120
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
4121
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
4123
/* { dg-do compile } */
4124
/* { dg-options "-O2" } */
4127
+#include "atomic-op-release.x"
4130
-atomic_fetch_add_RELEASE (int a)
4132
- return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
4136
-atomic_fetch_sub_RELEASE (int a)
4138
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
4142
-atomic_fetch_and_RELEASE (int a)
4144
- return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
4148
-atomic_fetch_nand_RELEASE (int a)
4150
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
4154
-atomic_fetch_xor_RELEASE (int a)
4156
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
4160
-atomic_fetch_or_RELEASE (int a)
4162
- return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
4165
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4166
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4167
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
4168
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
4173
+atomic_fetch_add_RELAXED (short a)
4175
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
4179
+atomic_fetch_sub_RELAXED (short a)
4181
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
4185
+atomic_fetch_and_RELAXED (short a)
4187
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
4191
+atomic_fetch_nand_RELAXED (short a)
4193
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
4197
+atomic_fetch_xor_RELAXED (short a)
4199
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
4203
+atomic_fetch_or_RELAXED (short a)
4205
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
4207
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
4208
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
4210
+/* { dg-do run } */
4211
+/* { dg-options "-O3 --save-temps -ffast-math" } */
4213
+#include <arm_neon.h>
4215
+extern void abort (void);
4216
+extern double fabs (double);
4218
+#define NUM_TESTS 8
4219
+#define DELTA 0.000001
4221
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
4222
+ 200.0f, -800.0f, -13.0f, -0.5f};
4223
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
4224
+ 200.0, -800.0, -13.0, -0.5};
4226
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
4228
+test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t (void) \
4232
+ int nlanes = LANES; \
4233
+ U##int##WIDTH##_t expected_out[NUM_TESTS]; \
4234
+ U##int##WIDTH##_t actual_out[NUM_TESTS]; \
4236
+ for (i = 0; i < NUM_TESTS; i++) \
4239
+ = vcvt##SUFFIX##D##_##S##WIDTH##_f##WIDTH (input_f##WIDTH[i]); \
4240
+ /* Don't vectorize this. */ \
4241
+ asm volatile ("" : : : "memory"); \
4244
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
4246
+ U##int##WIDTH##x##LANES##_t out = \
4247
+ vcvt##SUFFIX##Q##_##S##WIDTH##_f##WIDTH \
4248
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
4249
+ vst1##Q##_##S##WIDTH (actual_out + i, out); \
4252
+ for (i = 0; i < NUM_TESTS; i++) \
4253
+ ret &= fabs (expected_out[i] - actual_out[i]) < DELTA; \
4259
+#define BUILD_VARIANTS(SUFFIX) \
4260
+TEST (SUFFIX, , 32, 2, s, ,s) \
4261
+TEST (SUFFIX, q, 32, 4, s, ,s) \
4262
+TEST (SUFFIX, q, 64, 2, s, ,d) \
4263
+TEST (SUFFIX, , 32, 2, u,u,s) \
4264
+TEST (SUFFIX, q, 32, 4, u,u,s) \
4265
+TEST (SUFFIX, q, 64, 2, u,u,d) \
4268
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
4269
+/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */
4270
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4271
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4272
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4273
+/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */
4274
+/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */
4275
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4276
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4277
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4279
+/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */
4280
+/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */
4281
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4282
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4283
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4284
+/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */
4285
+/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */
4286
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4287
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4288
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4290
+/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */
4291
+/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */
4292
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4293
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4294
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4295
+/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */
4296
+/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */
4297
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4298
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4299
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4301
+/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */
4302
+/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */
4303
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4304
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4305
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4306
+/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */
4307
+/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */
4308
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4309
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4310
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4312
+/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */
4313
+/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */
4314
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4315
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4316
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4317
+/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */
4318
+/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */
4319
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
4320
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
4321
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
4324
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
4326
+ if (!test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t ()) \
4331
+main (int argc, char **argv)
4333
+ BUILD_VARIANTS ( )
4334
+ BUILD_VARIANTS (a)
4335
+ BUILD_VARIANTS (m)
4336
+ BUILD_VARIANTS (n)
4337
+ BUILD_VARIANTS (p)
4341
+/* { dg-final { cleanup-saved-temps } } */
4342
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
4343
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
4348
+atomic_fetch_add_RELEASE (int a)
4350
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
4354
+atomic_fetch_sub_RELEASE (int a)
4356
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
4360
+atomic_fetch_and_RELEASE (int a)
4362
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
4366
+atomic_fetch_nand_RELEASE (int a)
4368
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
4372
+atomic_fetch_xor_RELEASE (int a)
4374
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
4378
+atomic_fetch_or_RELEASE (int a)
4380
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
4382
--- a/src/gcc/testsuite/gcc.target/aarch64/fabd.c
4383
+++ b/src/gcc/testsuite/gcc.target/aarch64/fabd.c
4385
+/* { dg-do run } */
4386
+/* { dg-options "-O1 -fno-inline --save-temps" } */
4388
+extern double fabs (double);
4389
+extern float fabsf (float);
4390
+extern void abort ();
4391
+extern void exit (int);
4394
+fabd_d (double x, double y, double d)
4396
+ if ((fabs (x - y) - d) > 0.00001)
4400
+/* { dg-final { scan-assembler "fabd\td\[0-9\]+" } } */
4403
+fabd_f (float x, float y, float d)
4405
+ if ((fabsf (x - y) - d) > 0.00001)
4409
+/* { dg-final { scan-assembler "fabd\ts\[0-9\]+" } } */
4414
+ fabd_d (10.0, 5.0, 5.0);
4415
+ fabd_d (5.0, 10.0, 5.0);
4416
+ fabd_f (10.0, 5.0, 5.0);
4417
+ fabd_f (5.0, 10.0, 5.0);
4422
+/* { dg-final { cleanup-saved-temps } } */
4423
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
4424
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
4425
@@ -117,6 +117,16 @@
4426
9.0, 10.0, 11.0, 12.0,
4427
13.0, 14.0, 15.0, 16.0 };
4429
+ F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
4430
+ 1.0f, 1.0f, 1.0f, 1.0f,
4431
+ 1.0f, 1.0f, 1.0f, 1.0f,
4432
+ 1.0f, 1.0f, 1.0f, 1.0f };
4434
+ F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
4435
+ 1.0, 1.0, 1.0, 1.0,
4436
+ 1.0, 1.0, 1.0, 1.0,
4437
+ 1.0, 1.0, 1.0, 1.0 };
4439
/* Setup input vectors. */
4440
for (i=1; i<=16; i++)
4450
--- a/src/gcc/testsuite/gcc.target/aarch64/ngc.c
4451
+++ b/src/gcc/testsuite/gcc.target/aarch64/ngc.c
4453
+/* { dg-do run } */
4454
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4456
+extern void abort (void);
4457
+typedef unsigned int u32;
4460
+ngc_si (u32 a, u32 b, u32 c, u32 d)
4466
+typedef unsigned long long u64;
4469
+ngc_si_tst (u64 a, u32 b, u32 c, u32 d)
4476
+ngc_di (u64 a, u64 b, u64 c, u64 d)
4488
+ x = ngc_si (29, 4, 5, 4);
4492
+ x = ngc_si (1024, 2, 20, 13);
4496
+ y = ngc_si_tst (0x130000029ll, 32, 50, 12);
4497
+ if (y != 0xffffffe0)
4500
+ y = ngc_si_tst (0x5000500050005ll, 21, 2, 14);
4501
+ if (y != 0xffffffea)
4504
+ y = ngc_di (0x130000029ll, 0x320000004ll, 0x505050505ll, 0x123123123ll);
4505
+ if (y != 0xfffffffcdffffffc)
4508
+ y = ngc_di (0x5000500050005ll,
4509
+ 0x2111211121112ll, 0x0000000002020ll, 0x1414575046477ll);
4510
+ if (y != 0xfffdeeedeeedeeed)
4516
+/* { dg-final { scan-assembler-times "ngc\tw\[0-9\]+, w\[0-9\]+" 2 } } */
4517
+/* { dg-final { scan-assembler-times "ngc\tx\[0-9\]+, x\[0-9\]+" 1 } } */
4518
+/* { dg-final { cleanup-saved-temps } } */
4519
--- a/src/gcc/testsuite/gcc.target/aarch64/cmp.c
4520
+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp.c
4522
+/* { dg-do compile } */
4523
+/* { dg-options "-O2" } */
4526
+cmp_si_test1 (int a, int b, int c)
4535
+cmp_si_test2 (int a, int b, int c)
4543
+typedef long long s64;
4546
+cmp_di_test1 (s64 a, s64 b, s64 c)
4555
+cmp_di_test2 (s64 a, s64 b, s64 c)
4564
+cmp_di_test3 (int a, s64 b, s64 c)
4573
+cmp_di_test4 (int a, s64 b, s64 c)
4575
+ if (((s64)a << 3) > b)
4581
+/* { dg-final { scan-assembler-times "cmp\tw\[0-9\]+, w\[0-9\]+" 2 } } */
4582
+/* { dg-final { scan-assembler-times "cmp\tx\[0-9\]+, x\[0-9\]+" 4 } } */
4583
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
4584
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
4586
extern float fabsf (float);
4587
extern double fabs (double);
4589
+#define DEF3a(fname, type, op) \
4590
+ void fname##_##type (pR##type a, \
4595
+ for (i = 0; i < 16; i++) \
4596
+ a[i] = op (b[i] - c[i]); \
4599
#define DEF3(fname, type, op) \
4600
void fname##_##type (pR##type a, \
4605
- for (i=0; i<16; i++) \
4606
+ for (i = 0; i < 16; i++) \
4607
a[i] = b[i] op c[i]; \
4614
- for (i=0; i<16; i++) \
4615
+ for (i = 0; i < 16; i++) \
4620
+#define DEFN3a(fname, op) \
4621
+ DEF3a (fname, F32, op) \
4622
+ DEF3a (fname, F64, op)
4624
#define DEFN3(fname, op) \
4625
DEF3 (fname, F32, op) \
4626
DEF3 (fname, F64, op)
4629
DEF2 (abs, F32, fabsf)
4630
DEF2 (abs, F64, fabs)
4631
+DEF3a (fabd, F32, fabsf)
4632
+DEF3a (fabd, F64, fabs)
4633
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
4634
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
4636
/* { dg-do compile } */
4637
/* { dg-options "-O2" } */
4640
+#include "atomic-op-acq_rel.x"
4643
-atomic_fetch_add_ACQ_REL (int a)
4645
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
4649
-atomic_fetch_sub_ACQ_REL (int a)
4651
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
4655
-atomic_fetch_and_ACQ_REL (int a)
4657
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
4661
-atomic_fetch_nand_ACQ_REL (int a)
4663
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
4667
-atomic_fetch_xor_ACQ_REL (int a)
4669
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
4673
-atomic_fetch_or_ACQ_REL (int a)
4675
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
4678
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4679
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4680
--- a/src/gcc/testsuite/gcc.target/aarch64/subs1.c
4681
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs1.c
4683
+/* { dg-do run } */
4684
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4686
+extern void abort (void);
4689
+subs_si_test1 (int a, int b, int c)
4693
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4701
+subs_si_test2 (int a, int b, int c)
4705
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, #255" } } */
4713
+subs_si_test3 (int a, int b, int c)
4715
+ int d = a - (b << 3);
4717
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4724
+typedef long long s64;
4727
+subs_di_test1 (s64 a, s64 b, s64 c)
4731
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4739
+subs_di_test2 (s64 a, s64 b, s64 c)
4743
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, #255" } } */
4751
+subs_di_test3 (s64 a, s64 b, s64 c)
4753
+ s64 d = a - (b << 3);
4755
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4767
+ x = subs_si_test1 (29, 4, 5);
4771
+ x = subs_si_test1 (5, 2, 20);
4775
+ x = subs_si_test2 (29, 4, 5);
4779
+ x = subs_si_test2 (1024, 2, 20);
4783
+ x = subs_si_test3 (35, 4, 5);
4787
+ x = subs_si_test3 (5, 2, 20);
4791
+ y = subs_di_test1 (0x130000029ll,
4795
+ if (y != 0x45000002d)
4798
+ y = subs_di_test1 (0x5000500050005ll,
4799
+ 0x2111211121112ll,
4800
+ 0x0000000002020ll);
4801
+ if (y != 0x7111711171117)
4804
+ y = subs_di_test2 (0x130000029ll,
4807
+ if (y != 0x955050433)
4810
+ y = subs_di_test2 (0x130002900ll,
4813
+ if (y != 0x955052d0a)
4816
+ y = subs_di_test3 (0x130000029ll,
4819
+ if (y != 0x3790504f6)
4822
+ y = subs_di_test3 (0x130002900ll,
4825
+ if (y != 0x27d052dcd)
4831
+/* { dg-final { cleanup-saved-temps } } */
4832
--- a/src/gcc/testsuite/gcc.target/aarch64/adds2.c
4833
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds2.c
4835
+/* { dg-do run } */
4836
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4838
+extern void abort (void);
4841
+adds_si_test1 (int a, int b, int c)
4845
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4846
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4854
+adds_si_test2 (int a, int b, int c)
4856
+ int d = a + 0xfff;
4858
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
4859
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
4867
+adds_si_test3 (int a, int b, int c)
4869
+ int d = a + (b << 3);
4871
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4872
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4879
+typedef long long s64;
4882
+adds_di_test1 (s64 a, s64 b, s64 c)
4886
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4887
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4895
+adds_di_test2 (s64 a, s64 b, s64 c)
4897
+ s64 d = a + 0x1000ll;
4899
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
4900
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
4908
+adds_di_test3 (s64 a, s64 b, s64 c)
4910
+ s64 d = a + (b << 3);
4912
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4913
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4925
+ x = adds_si_test1 (29, 4, 5);
4929
+ x = adds_si_test1 (5, 2, 20);
4933
+ x = adds_si_test2 (29, 4, 5);
4937
+ x = adds_si_test2 (1024, 2, 20);
4941
+ x = adds_si_test3 (35, 4, 5);
4945
+ x = adds_si_test3 (5, 2, 20);
4949
+ y = adds_di_test1 (0x130000029ll,
4953
+ if (y != 0xc75050536)
4956
+ y = adds_di_test1 (0x5000500050005ll,
4957
+ 0x2111211121112ll,
4958
+ 0x0000000002020ll);
4959
+ if (y != 0x9222922294249)
4962
+ y = adds_di_test2 (0x130000029ll,
4965
+ if (y != 0x955051532)
4968
+ y = adds_di_test2 (0x540004100ll,
4971
+ if (y != 0x1065055309)
4974
+ y = adds_di_test3 (0x130000029ll,
4977
+ if (y != 0x9b9050576)
4980
+ y = adds_di_test3 (0x130002900ll,
4983
+ if (y != 0xafd052e4d)
4989
+/* { dg-final { cleanup-saved-temps } } */
4990
--- a/src/gcc/testsuite/lib/target-supports.exp
4991
+++ b/src/gcc/testsuite/lib/target-supports.exp
4992
@@ -2012,6 +2012,7 @@
4993
|| ([istarget powerpc*-*-*]
4994
&& ![istarget powerpc-*-linux*paired*])
4995
|| [istarget x86_64-*-*]
4996
+ || [istarget aarch64*-*-*]
4997
|| ([istarget arm*-*-*]
4998
&& [check_effective_target_arm_neon_ok])} {
4999
set et_vect_uintfloat_cvt_saved 1
5000
@@ -2147,22 +2148,6 @@
5004
-# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
5005
-# -mfloat-abi=softfp
5006
-proc check_effective_target_arm_v8_neon_ok {} {
5007
- if { [check_effective_target_arm32] } {
5008
- return [check_no_compiler_messages arm_v8_neon_ok object {
5011
- __asm__ volatile ("vrintn.f32 q0, q0");
5014
- } "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"]
5020
# Return 1 if this is an ARM target supporting -mfpu=vfp
5021
# -mfloat-abi=hard. Some multilibs may be incompatible with these
5023
@@ -2226,7 +2211,8 @@
5024
if { ! [check_effective_target_arm_v8_neon_ok] } {
5027
- return "$flags -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=softfp"
5028
+ global et_arm_v8_neon_flags
5029
+ return "$flags $et_arm_v8_neon_flags -march=armv8-a"
5032
# Add the options needed for NEON. We need either -mfloat-abi=softfp
5033
@@ -2270,6 +2256,79 @@
5034
check_effective_target_arm_neon_ok_nocache]
5037
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
5038
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
5039
+# incompatible with these options. Also set et_arm_neon_flags to the
5040
+# best options to add.
5042
+proc check_effective_target_arm_neon_fp16_ok_nocache { } {
5043
+ global et_arm_neon_fp16_flags
5044
+ set et_arm_neon_fp16_flags ""
5045
+ if { [check_effective_target_arm32] } {
5046
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
5047
+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
5048
+ if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
5049
+ #include "arm_neon.h"
5051
+ foo (float32x4_t arg)
5053
+ return vcvt_f16_f32 (arg);
5056
+ set et_arm_neon_fp16_flags $flags
5065
+proc check_effective_target_arm_neon_fp16_ok { } {
5066
+ return [check_cached_effective_target arm_neon_fp16_ok \
5067
+ check_effective_target_arm_neon_fp16_ok_nocache]
5070
+proc add_options_for_arm_neon_fp16 { flags } {
5071
+ if { ! [check_effective_target_arm_neon_fp16_ok] } {
5074
+ global et_arm_neon_fp16_flags
5075
+ return "$flags $et_arm_neon_fp16_flags"
5078
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
5079
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
5080
+# incompatible with these options. Also set et_arm_v8_neon_flags to the
5081
+# best options to add.
5083
+proc check_effective_target_arm_v8_neon_ok_nocache { } {
5084
+ global et_arm_v8_neon_flags
5085
+ set et_arm_v8_neon_flags ""
5086
+ if { [check_effective_target_arm32] } {
5087
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
5088
+ if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
5089
+ #include "arm_neon.h"
5093
+ __asm__ volatile ("vrintn.f32 q0, q0");
5096
+ set et_arm_v8_neon_flags $flags
5105
+proc check_effective_target_arm_v8_neon_ok { } {
5106
+ return [check_cached_effective_target arm_v8_neon_ok \
5107
+ check_effective_target_arm_v8_neon_ok_nocache]
5110
# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
5111
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
5112
# incompatible with these options. Also set et_arm_neonv2_flags to the
5113
@@ -2509,6 +2568,24 @@
5114
} [add_options_for_arm_neonv2 ""]]
5117
+# Return 1 if the target supports executing ARMv8 NEON instructions, 0
5120
+proc check_effective_target_arm_v8_neon_hw { } {
5121
+ return [check_runtime arm_v8_neon_hw_available {
5122
+ #include "arm_neon.h"
5127
+ asm ("vrinta.f32 %P0, %P1"
5132
+ } [add_options_for_arm_v8_neon ""]]
5135
# Return 1 if this is a ARM target with NEON enabled.
5137
proc check_effective_target_arm_neon { } {
5138
--- a/src/gcc/testsuite/ChangeLog.linaro
5139
+++ b/src/gcc/testsuite/ChangeLog.linaro
5141
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5143
+ Backport from trunk r198574-198575.
5144
+ 2013-05-03 Vidya Praveen <vidyapraveen@arm.com>
5146
+ * gcc.target/aarch64/fabd.c: New file.
5148
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5150
+ Backport from trunk r198490-198496.
5151
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
5153
+ * gcc.target/aarch64/scalar-vca.c: New.
5154
+ * gcc.target/aarch64/vect-vca.c: Likewise.
5156
+ 2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
5158
+ * gcc.target/aarch64/scalar_intrinsics.c (force_simd): New.
5159
+ (test_vceqd_s64): Force arguments to SIMD registers.
5160
+ (test_vceqzd_s64): Likewise.
5161
+ (test_vcged_s64): Likewise.
5162
+ (test_vcled_s64): Likewise.
5163
+ (test_vcgezd_s64): Likewise.
5164
+ (test_vcged_u64): Likewise.
5165
+ (test_vcgtd_s64): Likewise.
5166
+ (test_vcltd_s64): Likewise.
5167
+ (test_vcgtzd_s64): Likewise.
5168
+ (test_vcgtd_u64): Likewise.
5169
+ (test_vclezd_s64): Likewise.
5170
+ (test_vcltzd_s64): Likewise.
5171
+ (test_vtst_s64): Likewise.
5172
+ (test_vtst_u64): Likewise.
5174
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5176
+ Backport from trunk r198191.
5177
+ 2013-04-23 Sofiane Naci <sofiane.naci@arm.com>
5179
+ * gcc.target/aarch64/scalar-mov.c: New testcase.
5181
+2013-05-14 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5183
+ Backport from trunk r197838.
5184
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5186
+ * gcc.target/aarch64/negs.c: New.
5188
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5190
+ Backport from trunk r198019.
5191
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5193
+ * gcc.target/aarch64/adds1.c: New.
5194
+ * gcc.target/aarch64/adds2.c: New.
5195
+ * gcc.target/aarch64/subs1.c: New.
5196
+ * gcc.target/aarch64/subs2.c: New.
5198
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5200
+ Backport from trunk r198394,198396-198400,198402-198404,198406.
5201
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
5203
+ * lib/target-supports.exp (vect_uintfloat_cvt): Enable for AArch64.
5205
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
5207
+ * gcc.target/aarch64/vect-vcvt.c: New.
5209
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
5211
+ * gcc.target/aarch64/vect-vrnd.c: New.
5213
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5215
+ Backport from trunk r198302-198306,198316.
5216
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
5217
+ Tejas Belagod <tejas.belagod@arm.com>
5219
+ * gcc.target/aarch64/vaddv-intrinsic.c: New.
5220
+ * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
5221
+ * gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
5223
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5225
+ * gcc.target/aarch64/cmp.c: New.
5227
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5229
+ * gcc.target/aarch64/ngc.c: New.
5231
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5233
+ Backport from trunk r198298.
5234
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
5236
+ * lib/target-supports.exp
5237
+ (check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
5238
+ (check_effective_target_arm_neon_fp16_ok): Likewise.
5239
+ (add_options_for_arm_neon_fp16): Likewise.
5240
+ * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
5241
+ * gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
5243
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5245
+ Backport from trunk r198136-198137,198142,198176
5246
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
5248
+ * gcc.target/aarch64/vrecps.c: New.
5249
+ * gcc.target/aarch64/vrecpx.c: Likewise.
5251
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5253
+ Backport from trunk r198020.
5254
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5256
+ * gcc.target/aarch64/adds3.c: New.
5257
+ * gcc.target/aarch64/subs3.c: New.
5259
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5261
+ Backport from trunk r197965.
5262
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
5264
+ * gcc.target/arm/anddi3-opt.c: New test.
5265
+ * gcc.target/arm/anddi3-opt2.c: Likewise.
5267
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5269
+ Backport from trunk r197642.
5270
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
5272
+ * gcc.target/arm/minmax_minus.c: New test.
5274
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5276
+ Backport from trunk r197530,197921.
5277
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
5279
+ * gcc.target/arm/peep-ldrd-1.c: New test.
5280
+ * gcc.target/arm/peep-strd-1.c: Likewise.
5282
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5284
+ Backport from trunk r197523.
5285
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
5287
+ * lib/target-supports.exp (add_options_for_arm_v8_neon):
5288
+ Add -march=armv8-a when we use v8 NEON.
5289
+ (check_effective_target_vect_call_btruncf): Remove arm-*-*-*.
5290
+ (check_effective_target_vect_call_ceilf): Likewise.
5291
+ (check_effective_target_vect_call_floorf): Likewise.
5292
+ (check_effective_target_vect_call_roundf): Likewise.
5293
+ (check_vect_support_and_set_flags): Remove check for arm_v8_neon.
5294
+ * gcc.target/arm/vect-rounding-btruncf.c: New testcase.
5295
+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise.
5296
+ * gcc.target/arm/vect-rounding-floorf.c: Likewise.
5297
+ * gcc.target/arm/vect-rounding-roundf.c: Likewise.
5299
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5301
+ Backport from trunk r197518-197522,197516-197528.
5302
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
5304
+ * gcc.target/arm/negdi-1.c: New test.
5305
+ * gcc.target/arm/negdi-2.c: Likewise.
5306
+ * gcc.target/arm/negdi-3.c: Likewise.
5307
+ * gcc.target/arm/negdi-4.c: Likewise.
5309
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5311
+ Backport from trunk r197489-197491.
5312
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
5314
+ * lib/target-supports.exp (check_effective_target_arm_v8_neon_hw):
5316
+ (check_effective_target_arm_v8_neon_ok_nocache):
5318
+ (check_effective_target_arm_v8_neon_ok): Change to use
5319
+ check_effective_target_arm_v8_neon_ok_nocache.
5320
+ (add_options_for_arm_v8_neon): Use et_arm_v8_neon_flags to set ARMv8
5322
+ (check_effective_target_vect_call_btruncf):
5323
+ Enable for arm and ARMv8 NEON.
5324
+ (check_effective_target_vect_call_ceilf): Likewise.
5325
+ (check_effective_target_vect_call_floorf): Likewise.
5326
+ (check_effective_target_vect_call_roundf): Likewise.
5327
+ (check_vect_support_and_set_flags): Handle ARMv8 NEON effective
5330
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5332
+ Backport from trunk r196795-196797,196957.
5333
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
5335
+ * gcc.target/aarch64/sbc.c: New test.
5337
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
5339
+ * gcc.target/aarch64/ror.c: New test.
5341
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
5343
+ * gcc.target/aarch64/extr.c: New test.
5345
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5347
+ * GCC Linaro 4.8-2013.04 released.
5349
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5351
+ Backport from trunk r197052.
5352
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
5354
+ * gcc.target/arm/vseleqdf.c: New test.
5355
+ * gcc.target/arm/vseleqsf.c: Likewise.
5356
+ * gcc.target/arm/vselgedf.c: Likewise.
5357
+ * gcc.target/arm/vselgesf.c: Likewise.
5358
+ * gcc.target/arm/vselgtdf.c: Likewise.
5359
+ * gcc.target/arm/vselgtsf.c: Likewise.
5360
+ * gcc.target/arm/vselledf.c: Likewise.
5361
+ * gcc.target/arm/vsellesf.c: Likewise.
5362
+ * gcc.target/arm/vselltdf.c: Likewise.
5363
+ * gcc.target/arm/vselltsf.c: Likewise.
5364
+ * gcc.target/arm/vselnedf.c: Likewise.
5365
+ * gcc.target/arm/vselnesf.c: Likewise.
5366
+ * gcc.target/arm/vselvcdf.c: Likewise.
5367
+ * gcc.target/arm/vselvcsf.c: Likewise.
5368
+ * gcc.target/arm/vselvsdf.c: Likewise.
5369
+ * gcc.target/arm/vselvssf.c: Likewise.
5371
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5373
+ Backport from trunk r197051.
5374
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
5376
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Move test
5378
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.x: ... to here.
5379
+ * gcc.target/aarch64/atomic-op-acq_rel.c: Move test body from here...
5380
+ * gcc.target/aarch64/atomic-op-acq_rel.x: ... to here.
5381
+ * gcc.target/aarch64/atomic-op-acquire.c: Move test body from here...
5382
+ * gcc.target/aarch64/atomic-op-acquire.x: ... to here.
5383
+ * gcc.target/aarch64/atomic-op-char.c: Move test body from here...
5384
+ * gcc.target/aarch64/atomic-op-char.x: ... to here.
5385
+ * gcc.target/aarch64/atomic-op-consume.c: Move test body from here...
5386
+ * gcc.target/aarch64/atomic-op-consume.x: ... to here.
5387
+ * gcc.target/aarch64/atomic-op-int.c: Move test body from here...
5388
+ * gcc.target/aarch64/atomic-op-int.x: ... to here.
5389
+ * gcc.target/aarch64/atomic-op-relaxed.c: Move test body from here...
5390
+ * gcc.target/aarch64/atomic-op-relaxed.x: ... to here.
5391
+ * gcc.target/aarch64/atomic-op-release.c: Move test body from here...
5392
+ * gcc.target/aarch64/atomic-op-release.x: ... to here.
5393
+ * gcc.target/aarch64/atomic-op-seq_cst.c: Move test body from here...
5394
+ * gcc.target/aarch64/atomic-op-seq_cst.x: ... to here.
5395
+ * gcc.target/aarch64/atomic-op-short.c: Move test body from here...
5396
+ * gcc.target/aarch64/atomic-op-short.x: ... to here.
5397
+ * gcc.target/arm/atomic-comp-swap-release-acquire.c: New test.
5398
+ * gcc.target/arm/atomic-op-acq_rel.c: Likewise.
5399
+ * gcc.target/arm/atomic-op-acquire.c: Likewise.
5400
+ * gcc.target/arm/atomic-op-char.c: Likewise.
5401
+ * gcc.target/arm/atomic-op-consume.c: Likewise.
5402
+ * gcc.target/arm/atomic-op-int.c: Likewise.
5403
+ * gcc.target/arm/atomic-op-relaxed.c: Likewise.
5404
+ * gcc.target/arm/atomic-op-release.c: Likewise.
5405
+ * gcc.target/arm/atomic-op-seq_cst.c: Likewise.
5406
+ * gcc.target/arm/atomic-op-short.c: Likewise.
5408
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5410
+ Backport from trunk r196876.
5411
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
5413
+ * gcc.target/arm/neon-for-64bits-1.c: New tests.
5414
+ * gcc.target/arm/neon-for-64bits-2.c: Likewise.
5416
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5418
+ Backport from trunk r196858.
5419
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
5421
+ * gcc.target/aarch64/vect.c: Test and result vector added
5422
+ for sabd and saba instructions.
5423
+ * gcc.target/aarch64/vect-compile.c: Check for sabd and saba
5424
+ instructions in assembly.
5425
+ * gcc.target/aarch64/vect.x: Add sabd and saba test functions.
5426
+ * gcc.target/aarch64/vect-fp.c: Test and result vector added
5427
+ for fabd instruction.
5428
+ * gcc.target/aarch64/vect-fp-compile.c: Check for fabd
5429
+ instruction in assembly.
5430
+ * gcc.target/aarch64/vect-fp.x: Add fabd test function.
5431
--- a/src/gcc/objcp/ChangeLog.linaro
5432
+++ b/src/gcc/objcp/ChangeLog.linaro
5434
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5436
+ * GCC Linaro 4.8-2013.04 released.
5437
--- a/src/gcc/cp/ChangeLog.linaro
5438
+++ b/src/gcc/cp/ChangeLog.linaro
5440
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5442
+ * GCC Linaro 4.8-2013.04 released.
5443
--- a/src/gcc/go/ChangeLog.linaro
5444
+++ b/src/gcc/go/ChangeLog.linaro
5446
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5448
+ * GCC Linaro 4.8-2013.04 released.
5449
--- a/src/gcc/ada/ChangeLog.linaro
5450
+++ b/src/gcc/ada/ChangeLog.linaro
5452
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5454
+ * GCC Linaro 4.8-2013.04 released.
5455
--- a/src/gcc/common/config/aarch64/aarch64-common.c
5456
+++ b/src/gcc/common/config/aarch64/aarch64-common.c
5459
/* Enable section anchors by default at -O1 or higher. */
5460
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
5461
+ /* Enable redundant extension instructions removal at -O2 and higher. */
5462
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
5463
{ OPT_LEVELS_NONE, 0, NULL, 0 }
5466
--- a/src/gcc/fortran/ChangeLog.linaro
5467
+++ b/src/gcc/fortran/ChangeLog.linaro
5469
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5471
+ * GCC Linaro 4.8-2013.04 released.
5472
--- a/src/gcc/configure.ac
5473
+++ b/src/gcc/configure.ac
5476
AC_SUBST(CONFIGURE_SPECS)
5478
-ACX_PKGVERSION([GCC])
5479
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
5480
ACX_BUGURL([http://gcc.gnu.org/bugs.html])
5482
# Sanity check enable_languages in case someone does not run the toplevel
5483
--- a/src/gcc/coretypes.h
5484
+++ b/src/gcc/coretypes.h
5486
typedef union gimple_statement_d *gimple;
5487
typedef const union gimple_statement_d *const_gimple;
5488
typedef gimple gimple_seq;
5489
+struct gimple_stmt_iterator_d;
5490
+typedef struct gimple_stmt_iterator_d gimple_stmt_iterator;
5492
typedef union section section;
5494
--- a/src/gcc/gimple-fold.c
5495
+++ b/src/gcc/gimple-fold.c
5496
@@ -1143,6 +1143,8 @@
5497
gimplify_and_update_call_from_tree (gsi, result);
5500
+ else if (DECL_BUILT_IN_CLASS (callee) == BUILT_IN_MD)
5501
+ changed |= targetm.gimple_fold_builtin (gsi);
5505
--- a/src/gcc/lto/ChangeLog.linaro
5506
+++ b/src/gcc/lto/ChangeLog.linaro
5508
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5510
+ * GCC Linaro 4.8-2013.04 released.
5511
--- a/src/gcc/po/ChangeLog.linaro
5512
+++ b/src/gcc/po/ChangeLog.linaro
5514
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
5516
+ * GCC Linaro 4.8-2013.04 released.
5517
--- a/src/gcc/gimple.h
5518
+++ b/src/gcc/gimple.h
5521
/* Iterator object for GIMPLE statement sequences. */
5524
+struct gimple_stmt_iterator_d
5526
/* Sequence node holding the current statement. */
5527
gimple_seq_node ptr;
5529
block/sequence is removed. */
5532
-} gimple_stmt_iterator;
5536
/* Data structure definitions for GIMPLE tuples. NOTE: word markers
5537
are for 64 bit hosts. */
5539
--- a/src/gcc/config/aarch64/aarch64-simd.md
5540
+++ b/src/gcc/config/aarch64/aarch64-simd.md
5543
; Main data types used by the insntructions
5545
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
5546
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
5547
(const_string "unknown"))
5551
; simd_dup duplicate element.
5552
; simd_dupgp duplicate general purpose register.
5553
; simd_ext bitwise extract from pair.
5554
+; simd_fabd floating point absolute difference.
5555
; simd_fadd floating point add/sub.
5556
; simd_fcmp floating point compare.
5557
; simd_fcvti floating point convert to integer.
5559
; simd_fmul floating point multiply.
5560
; simd_fmul_elt floating point multiply (by element).
5561
; simd_fnegabs floating point neg/abs.
5562
-; simd_frcpe floating point reciprocal estimate.
5563
-; simd_frcps floating point reciprocal step.
5564
-; simd_frecx floating point reciprocal exponent.
5565
+; simd_frecpe floating point reciprocal estimate.
5566
+; simd_frecps floating point reciprocal step.
5567
+; simd_frecpx floating point reciprocal exponent.
5568
; simd_frint floating point round to integer.
5569
; simd_fsqrt floating point square root.
5570
; simd_icvtf integer convert to floating point.
5593
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
5594
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
5595
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
5596
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
5597
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
5598
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
5599
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
5600
(eq_attr "simd_type" "none") (const_string "none")
5602
(const_string "unknown")))
5606
(define_insn "neg<mode>2"
5607
- [(set (match_operand:VDQM 0 "register_operand" "=w")
5608
- (neg:VDQM (match_operand:VDQM 1 "register_operand" "w")))]
5609
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
5610
+ (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
5612
"neg\t%0.<Vtype>, %1.<Vtype>"
5613
[(set_attr "simd_type" "simd_negabs")
5614
@@ -520,6 +522,51 @@
5615
(set_attr "simd_mode" "<MODE>")]
5618
+(define_insn "abd<mode>_3"
5619
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
5620
+ (abs:VDQ_BHSI (minus:VDQ_BHSI
5621
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
5622
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
5624
+ "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5625
+ [(set_attr "simd_type" "simd_abd")
5626
+ (set_attr "simd_mode" "<MODE>")]
5629
+(define_insn "aba<mode>_3"
5630
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
5631
+ (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
5632
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
5633
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")))
5634
+ (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
5636
+ "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5637
+ [(set_attr "simd_type" "simd_abd")
5638
+ (set_attr "simd_mode" "<MODE>")]
5641
+(define_insn "fabd<mode>_3"
5642
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
5643
+ (abs:VDQF (minus:VDQF
5644
+ (match_operand:VDQF 1 "register_operand" "w")
5645
+ (match_operand:VDQF 2 "register_operand" "w"))))]
5647
+ "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5648
+ [(set_attr "simd_type" "simd_fabd")
5649
+ (set_attr "simd_mode" "<MODE>")]
5652
+(define_insn "*fabd_scalar<mode>3"
5653
+ [(set (match_operand:GPF 0 "register_operand" "=w")
5654
+ (abs:GPF (minus:GPF
5655
+ (match_operand:GPF 1 "register_operand" "w")
5656
+ (match_operand:GPF 2 "register_operand" "w"))))]
5658
+ "fabd\t%<s>0, %<s>1, %<s>2"
5659
+ [(set_attr "simd_type" "simd_fabd")
5660
+ (set_attr "mode" "<MODE>")]
5663
(define_insn "and<mode>3"
5664
[(set (match_operand:VDQ 0 "register_operand" "=w")
5665
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
5666
@@ -1196,7 +1243,9 @@
5667
(set_attr "simd_mode" "<MODE>")]
5670
-(define_insn "aarch64_frint<frint_suffix><mode>"
5671
+;; Vector versions of the floating-point frint patterns.
5672
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
5673
+(define_insn "<frint_pattern><mode>2"
5674
[(set (match_operand:VDQF 0 "register_operand" "=w")
5675
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5677
@@ -1206,16 +1255,9 @@
5678
(set_attr "simd_mode" "<MODE>")]
5681
-;; Vector versions of the floating-point frint patterns.
5682
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
5683
-(define_expand "<frint_pattern><mode>2"
5684
- [(set (match_operand:VDQF 0 "register_operand")
5685
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
5690
-(define_insn "aarch64_fcvt<frint_suffix><su><mode>"
5691
+;; Vector versions of the fcvt standard patterns.
5692
+;; Expands to lbtrunc, lround, lceil, lfloor
5693
+(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
5694
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
5695
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
5696
[(match_operand:VDQF 1 "register_operand" "w")]
5697
@@ -1226,16 +1268,141 @@
5698
(set_attr "simd_mode" "<MODE>")]
5701
-;; Vector versions of the fcvt standard patterns.
5702
-;; Expands to lbtrunc, lround, lceil, lfloor
5703
-(define_expand "l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2"
5704
+(define_expand "<optab><VDQF:mode><fcvt_target>2"
5705
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
5706
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
5707
[(match_operand:VDQF 1 "register_operand")]
5713
+(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
5714
+ [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
5715
+ (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
5716
+ [(match_operand:VDQF 1 "register_operand")]
5721
+(define_expand "ftrunc<VDQF:mode>2"
5722
+ [(set (match_operand:VDQF 0 "register_operand")
5723
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
5728
+(define_insn "<optab><fcvt_target><VDQF:mode>2"
5729
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
5731
+ (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
5733
+ "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
5734
+ [(set_attr "simd_type" "simd_icvtf")
5735
+ (set_attr "simd_mode" "<MODE>")]
5738
+;; Conversions between vectors of floats and doubles.
5739
+;; Contains a mix of patterns to match standard pattern names
5740
+;; and those for intrinsics.
5742
+;; Float widening operations.
5744
+(define_insn "vec_unpacks_lo_v4sf"
5745
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5746
+ (float_extend:V2DF
5748
+ (match_operand:V4SF 1 "register_operand" "w")
5749
+ (parallel [(const_int 0) (const_int 1)])
5752
+ "fcvtl\\t%0.2d, %1.2s"
5753
+ [(set_attr "simd_type" "simd_fcvtl")
5754
+ (set_attr "simd_mode" "V2DF")]
5757
+(define_insn "aarch64_float_extend_lo_v2df"
5758
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5759
+ (float_extend:V2DF
5760
+ (match_operand:V2SF 1 "register_operand" "w")))]
5762
+ "fcvtl\\t%0.2d, %1.2s"
5763
+ [(set_attr "simd_type" "simd_fcvtl")
5764
+ (set_attr "simd_mode" "V2DF")]
5767
+(define_insn "vec_unpacks_hi_v4sf"
5768
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5769
+ (float_extend:V2DF
5771
+ (match_operand:V4SF 1 "register_operand" "w")
5772
+ (parallel [(const_int 2) (const_int 3)])
5775
+ "fcvtl2\\t%0.2d, %1.4s"
5776
+ [(set_attr "simd_type" "simd_fcvtl")
5777
+ (set_attr "simd_mode" "V2DF")]
5780
+;; Float narrowing operations.
5782
+(define_insn "aarch64_float_truncate_lo_v2sf"
5783
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
5784
+ (float_truncate:V2SF
5785
+ (match_operand:V2DF 1 "register_operand" "w")))]
5787
+ "fcvtn\\t%0.2s, %1.2d"
5788
+ [(set_attr "simd_type" "simd_fcvtl")
5789
+ (set_attr "simd_mode" "V2SF")]
5792
+(define_insn "aarch64_float_truncate_hi_v4sf"
5793
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
5795
+ (match_operand:V2SF 1 "register_operand" "0")
5796
+ (float_truncate:V2SF
5797
+ (match_operand:V2DF 2 "register_operand" "w"))))]
5799
+ "fcvtn2\\t%0.4s, %2.2d"
5800
+ [(set_attr "simd_type" "simd_fcvtl")
5801
+ (set_attr "simd_mode" "V4SF")]
5804
+(define_expand "vec_pack_trunc_v2df"
5805
+ [(set (match_operand:V4SF 0 "register_operand")
5807
+ (float_truncate:V2SF
5808
+ (match_operand:V2DF 1 "register_operand"))
5809
+ (float_truncate:V2SF
5810
+ (match_operand:V2DF 2 "register_operand"))
5814
+ rtx tmp = gen_reg_rtx (V2SFmode);
5815
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1]));
5816
+ emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
5817
+ tmp, operands[2]));
5822
+(define_expand "vec_pack_trunc_df"
5823
+ [(set (match_operand:V2SF 0 "register_operand")
5825
+ (float_truncate:SF
5826
+ (match_operand:DF 1 "register_operand"))
5827
+ (float_truncate:SF
5828
+ (match_operand:DF 2 "register_operand"))
5832
+ rtx tmp = gen_reg_rtx (V2SFmode);
5833
+ emit_insn (gen_move_lo_quad_v2df (tmp, operands[1]));
5834
+ emit_insn (gen_move_hi_quad_v2df (tmp, operands[2]));
5835
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
5840
(define_insn "aarch64_vmls<mode>"
5841
[(set (match_operand:VDQF 0 "register_operand" "=w")
5842
(minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
5843
@@ -1305,7 +1472,7 @@
5845
;; FP 'across lanes' add.
5847
-(define_insn "aarch64_addvv4sf"
5848
+(define_insn "aarch64_addpv4sf"
5849
[(set (match_operand:V4SF 0 "register_operand" "=w")
5850
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
5852
@@ -1321,8 +1488,8 @@
5855
rtx tmp = gen_reg_rtx (V4SFmode);
5856
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
5857
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
5858
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
5859
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
5863
@@ -1332,11 +1499,21 @@
5866
rtx tmp = gen_reg_rtx (V4SFmode);
5867
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
5868
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
5869
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
5870
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
5874
+(define_expand "aarch64_addvv4sf"
5875
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
5876
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
5880
+ emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
5884
(define_insn "aarch64_addv<mode>"
5885
[(set (match_operand:V2F 0 "register_operand" "=w")
5886
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
5887
@@ -1463,21 +1640,33 @@
5888
(set_attr "simd_mode" "V2SI")]
5891
-;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register
5892
-;; allocation. For an intrinsic of form:
5893
-;; vD = bsl_* (vS, vN, vM)
5894
+;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
5896
+;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
5899
+;; Thus our BSL is of the form:
5900
+;; op0 = bsl (mask, op2, op3)
5901
;; We can use any of:
5902
-;; bsl vS, vN, vM (if D = S)
5903
-;; bit vD, vN, vS (if D = M, so 1-bits in vS choose bits from vN, else vM)
5904
-;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN)
5907
+;; bsl mask, op1, op2
5908
+;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
5909
+;; bit op0, op2, mask
5910
+;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
5911
+;; bif op0, op1, mask
5913
(define_insn "aarch64_simd_bsl<mode>_internal"
5914
[(set (match_operand:VALL 0 "register_operand" "=w,w,w")
5916
- [(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
5917
- (match_operand:VALL 2 "register_operand" " w,w,0")
5918
- (match_operand:VALL 3 "register_operand" " w,0,w")]
5922
+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
5923
+ (match_operand:VALL 2 "register_operand" " w,w,0"))
5925
+ (not:<V_cmp_result>
5926
+ (match_dup:<V_cmp_result> 1))
5927
+ (match_operand:VALL 3 "register_operand" " w,0,w"))
5931
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
5932
@@ -1486,15 +1675,17 @@
5935
(define_expand "aarch64_simd_bsl<mode>"
5936
- [(set (match_operand:VALL 0 "register_operand")
5937
- (unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand")
5938
- (match_operand:VALL 2 "register_operand")
5939
- (match_operand:VALL 3 "register_operand")]
5942
+ [(match_operand:VALL 0 "register_operand")
5943
+ (match_operand:<V_cmp_result> 1 "register_operand")
5944
+ (match_operand:VALL 2 "register_operand")
5945
+ (match_operand:VALL 3 "register_operand")]
5948
/* We can't alias operands together if they have different modes. */
5949
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
5950
+ emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
5951
+ operands[2], operands[3]));
5955
(define_expand "aarch64_vcond_internal<mode>"
5956
@@ -1503,11 +1694,13 @@
5957
(match_operator 3 "comparison_operator"
5958
[(match_operand:VDQ 4 "register_operand")
5959
(match_operand:VDQ 5 "nonmemory_operand")])
5960
- (match_operand:VDQ 1 "register_operand")
5961
- (match_operand:VDQ 2 "register_operand")))]
5962
+ (match_operand:VDQ 1 "nonmemory_operand")
5963
+ (match_operand:VDQ 2 "nonmemory_operand")))]
5966
int inverse = 0, has_zero_imm_form = 0;
5967
+ rtx op1 = operands[1];
5968
+ rtx op2 = operands[2];
5969
rtx mask = gen_reg_rtx (<MODE>mode);
5971
switch (GET_CODE (operands[3]))
5972
@@ -1548,12 +1741,12 @@
5976
- emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
5977
+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
5982
- emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
5983
+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
5987
@@ -1566,12 +1759,27 @@
5991
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
5994
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
5997
+ op1 = operands[2];
5998
+ op2 = operands[1];
6001
+ /* If we have (a = (b CMP c) ? -1 : 0);
6002
+ Then we can simply move the generated mask. */
6004
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
6005
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
6006
+ emit_move_insn (operands[0], mask);
6010
+ op1 = force_reg (<MODE>mode, op1);
6012
+ op2 = force_reg (<MODE>mode, op2);
6013
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
6020
@@ -1581,13 +1789,15 @@
6021
(match_operator 3 "comparison_operator"
6022
[(match_operand:VDQF 4 "register_operand")
6023
(match_operand:VDQF 5 "nonmemory_operand")])
6024
- (match_operand:VDQF 1 "register_operand")
6025
- (match_operand:VDQF 2 "register_operand")))]
6026
+ (match_operand:VDQF 1 "nonmemory_operand")
6027
+ (match_operand:VDQF 2 "nonmemory_operand")))]
6031
int use_zero_form = 0;
6032
int swap_bsl_operands = 0;
6033
+ rtx op1 = operands[1];
6034
+ rtx op2 = operands[2];
6035
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
6036
rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
6038
@@ -1732,11 +1942,27 @@
6041
if (swap_bsl_operands)
6042
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
6045
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
6048
+ op1 = operands[2];
6049
+ op2 = operands[1];
6052
+ /* If we have (a = (b CMP c) ? -1 : 0);
6053
+ Then we can simply move the generated mask. */
6055
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
6056
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
6057
+ emit_move_insn (operands[0], mask);
6061
+ op1 = force_reg (<MODE>mode, op1);
6063
+ op2 = force_reg (<MODE>mode, op2);
6064
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
6071
@@ -1746,8 +1972,8 @@
6072
(match_operator 3 "comparison_operator"
6073
[(match_operand:VALL 4 "register_operand")
6074
(match_operand:VALL 5 "nonmemory_operand")])
6075
- (match_operand:VALL 1 "register_operand")
6076
- (match_operand:VALL 2 "register_operand")))]
6077
+ (match_operand:VALL 1 "nonmemory_operand")
6078
+ (match_operand:VALL 2 "nonmemory_operand")))]
6081
emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
6082
@@ -1756,6 +1982,22 @@
6086
+(define_expand "vcond<v_cmp_result><mode>"
6087
+ [(set (match_operand:<V_cmp_result> 0 "register_operand")
6088
+ (if_then_else:<V_cmp_result>
6089
+ (match_operator 3 "comparison_operator"
6090
+ [(match_operand:VDQF 4 "register_operand")
6091
+ (match_operand:VDQF 5 "nonmemory_operand")])
6092
+ (match_operand:<V_cmp_result> 1 "nonmemory_operand")
6093
+ (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
6096
+ emit_insn (gen_aarch64_vcond_internal<v_cmp_result> (
6097
+ operands[0], operands[1],
6098
+ operands[2], operands[3],
6099
+ operands[4], operands[5]));
6103
(define_expand "vcondu<mode><mode>"
6104
[(set (match_operand:VDQ 0 "register_operand")
6105
@@ -1763,8 +2005,8 @@
6106
(match_operator 3 "comparison_operator"
6107
[(match_operand:VDQ 4 "register_operand")
6108
(match_operand:VDQ 5 "nonmemory_operand")])
6109
- (match_operand:VDQ 1 "register_operand")
6110
- (match_operand:VDQ 2 "register_operand")))]
6111
+ (match_operand:VDQ 1 "nonmemory_operand")
6112
+ (match_operand:VDQ 2 "nonmemory_operand")))]
6115
emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
6116
@@ -2861,28 +3103,6 @@
6117
(set_attr "simd_mode" "<MODE>")]
6122
-(define_expand "aarch64_sshl_n<mode>"
6123
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6124
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
6125
- (match_operand:SI 2 "immediate_operand" "i")]
6128
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
6132
-(define_expand "aarch64_ushl_n<mode>"
6133
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6134
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
6135
- (match_operand:SI 2 "immediate_operand" "i")]
6138
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
6144
(define_insn "aarch64_<sur>shll_n<mode>"
6145
@@ -2927,28 +3147,6 @@
6146
(set_attr "simd_mode" "<MODE>")]
6151
-(define_expand "aarch64_sshr_n<mode>"
6152
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6153
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
6154
- (match_operand:SI 2 "immediate_operand" "i")]
6157
- emit_insn (gen_ashr<mode>3 (operands[0], operands[1], operands[2]));
6161
-(define_expand "aarch64_ushr_n<mode>"
6162
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6163
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
6164
- (match_operand:SI 2 "immediate_operand" "i")]
6167
- emit_insn (gen_lshr<mode>3 (operands[0], operands[1], operands[2]));
6173
(define_insn "aarch64_<sur>shr_n<mode>"
6174
@@ -3034,52 +3232,202 @@
6178
-;; cm(eq|ge|le|lt|gt)
6179
+;; cm(eq|ge|gt|lt|le)
6180
+;; Note, we have constraints for Dz and Z as different expanders
6181
+;; have different ideas of what should be passed to this pattern.
6183
-(define_insn "aarch64_cm<cmp><mode>"
6184
+(define_insn "aarch64_cm<optab><mode>"
6185
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
6186
- (unspec:<V_cmp_result>
6187
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
6188
- (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
6190
+ (neg:<V_cmp_result>
6191
+ (COMPARISONS:<V_cmp_result>
6192
+ (match_operand:VDQ 1 "register_operand" "w,w")
6193
+ (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6197
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
6198
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
6199
+ cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6200
+ cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
6201
[(set_attr "simd_type" "simd_cmp")
6202
(set_attr "simd_mode" "<MODE>")]
6206
+(define_insn_and_split "aarch64_cm<optab>di"
6207
+ [(set (match_operand:DI 0 "register_operand" "=w,w,r")
6210
+ (match_operand:DI 1 "register_operand" "w,w,r")
6211
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
6215
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6216
+ cm<optab>\t%d0, %d1, #0
6219
+ /* We need to prevent the split from
6220
+ happening in the 'w' constraint cases. */
6221
+ && GP_REGNUM_P (REGNO (operands[0]))
6222
+ && GP_REGNUM_P (REGNO (operands[1]))"
6223
+ [(set (reg:CC CC_REGNUM)
6227
+ (set (match_dup 0)
6230
+ (match_operand 3 "cc_register" "")
6233
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
6234
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6235
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6236
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6239
+ [(set_attr "simd_type" "simd_cmp")
6240
+ (set_attr "simd_mode" "DI")]
6243
-(define_insn "aarch64_cm<cmp><mode>"
6246
+(define_insn "aarch64_cm<optab><mode>"
6247
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
6248
- (unspec:<V_cmp_result>
6249
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6250
- (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6252
+ (neg:<V_cmp_result>
6253
+ (UCOMPARISONS:<V_cmp_result>
6254
+ (match_operand:VDQ 1 "register_operand" "w")
6255
+ (match_operand:VDQ 2 "register_operand" "w")
6258
- "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6259
+ "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6260
[(set_attr "simd_type" "simd_cmp")
6261
(set_attr "simd_mode" "<MODE>")]
6264
-;; fcm(eq|ge|le|lt|gt)
6265
+(define_insn_and_split "aarch64_cm<optab>di"
6266
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
6269
+ (match_operand:DI 1 "register_operand" "w,r")
6270
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
6274
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6277
+ /* We need to prevent the split from
6278
+ happening in the 'w' constraint cases. */
6279
+ && GP_REGNUM_P (REGNO (operands[0]))
6280
+ && GP_REGNUM_P (REGNO (operands[1]))"
6281
+ [(set (reg:CC CC_REGNUM)
6285
+ (set (match_dup 0)
6288
+ (match_operand 3 "cc_register" "")
6291
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
6292
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6293
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6294
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6297
+ [(set_attr "simd_type" "simd_cmp")
6298
+ (set_attr "simd_mode" "DI")]
6301
-(define_insn "aarch64_cm<cmp><mode>"
6304
+(define_insn "aarch64_cmtst<mode>"
6305
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
6306
+ (neg:<V_cmp_result>
6307
+ (ne:<V_cmp_result>
6309
+ (match_operand:VDQ 1 "register_operand" "w")
6310
+ (match_operand:VDQ 2 "register_operand" "w"))
6311
+ (vec_duplicate:<V_cmp_result> (const_int 0)))))]
6313
+ "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6314
+ [(set_attr "simd_type" "simd_cmp")
6315
+ (set_attr "simd_mode" "<MODE>")]
6318
+(define_insn_and_split "aarch64_cmtstdi"
6319
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
6323
+ (match_operand:DI 1 "register_operand" "w,r")
6324
+ (match_operand:DI 2 "register_operand" "w,r"))
6328
+ cmtst\t%d0, %d1, %d2
6331
+ /* We need to prevent the split from
6332
+ happening in the 'w' constraint cases. */
6333
+ && GP_REGNUM_P (REGNO (operands[0]))
6334
+ && GP_REGNUM_P (REGNO (operands[1]))"
6335
+ [(set (reg:CC_NZ CC_REGNUM)
6337
+ (and:DI (match_dup 1)
6340
+ (set (match_dup 0)
6343
+ (match_operand 3 "cc_register" "")
6346
+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
6347
+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
6348
+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
6349
+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
6350
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6353
+ [(set_attr "simd_type" "simd_cmp")
6354
+ (set_attr "simd_mode" "DI")]
6357
+;; fcm(eq|ge|gt|le|lt)
6359
+(define_insn "aarch64_cm<optab><mode>"
6360
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
6361
- (unspec:<V_cmp_result>
6362
- [(match_operand:VDQF 1 "register_operand" "w,w")
6363
- (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
6365
+ (neg:<V_cmp_result>
6366
+ (COMPARISONS:<V_cmp_result>
6367
+ (match_operand:VALLF 1 "register_operand" "w,w")
6368
+ (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
6372
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
6373
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
6374
+ fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6375
+ fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
6376
[(set_attr "simd_type" "simd_fcmp")
6377
(set_attr "simd_mode" "<MODE>")]
6381
+;; Note we can also handle what would be fac(le|lt) by
6382
+;; generating fac(ge|gt).
6384
+(define_insn "*aarch64_fac<optab><mode>"
6385
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
6386
+ (neg:<V_cmp_result>
6387
+ (FAC_COMPARISONS:<V_cmp_result>
6388
+ (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
6389
+ (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
6392
+ "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6393
+ [(set_attr "simd_type" "simd_fcmp")
6394
+ (set_attr "simd_mode" "<MODE>")]
6399
(define_insn "aarch64_addp<mode>"
6400
@@ -3105,19 +3453,6 @@
6401
(set_attr "simd_mode" "DI")]
6406
-(define_expand "aarch64_<maxmin><mode>"
6407
- [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
6408
- (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
6409
- (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
6412
- emit_insn (gen_<maxmin><mode>3 (operands[0], operands[1], operands[2]));
6417
(define_insn "aarch64_<fmaxmin><mode>"
6418
[(set (match_operand:VDQF 0 "register_operand" "=w")
6419
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
6420
@@ -3140,16 +3475,6 @@
6421
(set_attr "simd_mode" "<MODE>")]
6424
-(define_expand "aarch64_sqrt<mode>"
6425
- [(match_operand:VDQF 0 "register_operand" "=w")
6426
- (match_operand:VDQF 1 "register_operand" "w")]
6429
- emit_insn (gen_sqrt<mode>2 (operands[0], operands[1]));
6434
;; Patterns for vector struct loads and stores.
6436
(define_insn "vec_load_lanesoi<mode>"
6437
@@ -3714,3 +4039,25 @@
6438
"ld1r\\t{%0.<Vtype>}, %1"
6439
[(set_attr "simd_type" "simd_load1r")
6440
(set_attr "simd_mode" "<MODE>")])
6442
+(define_insn "aarch64_frecpe<mode>"
6443
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
6444
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
6447
+ "frecpe\\t%0.<Vtype>, %1.<Vtype>"
6448
+ [(set_attr "simd_type" "simd_frecpe")
6449
+ (set_attr "simd_mode" "<MODE>")]
6452
+(define_insn "aarch64_frecps<mode>"
6453
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
6454
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
6455
+ (match_operand:VDQF 2 "register_operand" "w")]
6458
+ "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
6459
+ [(set_attr "simd_type" "simd_frecps")
6460
+ (set_attr "simd_mode" "<MODE>")]
6463
--- a/src/gcc/config/aarch64/predicates.md
6464
+++ b/src/gcc/config/aarch64/predicates.md
6466
(ior (match_operand 0 "register_operand")
6467
(match_test "op == const0_rtx"))))
6469
+(define_predicate "aarch64_reg_or_fp_zero"
6470
+ (and (match_code "reg,subreg,const_double")
6471
+ (ior (match_operand 0 "register_operand")
6472
+ (match_test "aarch64_float_const_zero_rtx_p (op)"))))
6474
(define_predicate "aarch64_reg_zero_or_m1_or_1"
6475
(and (match_code "reg,subreg,const_int")
6476
(ior (match_operand 0 "register_operand")
6477
--- a/src/gcc/config/aarch64/aarch64-elf.h
6478
+++ b/src/gcc/config/aarch64/aarch64-elf.h
6481
#define ASM_COMMENT_START "//"
6483
-#define REGISTER_PREFIX ""
6484
#define LOCAL_LABEL_PREFIX "."
6485
#define USER_LABEL_PREFIX ""
6487
--- a/src/gcc/config/aarch64/arm_neon.h
6488
+++ b/src/gcc/config/aarch64/arm_neon.h
6489
@@ -4468,17 +4468,6 @@
6493
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6494
-vabs_f32 (float32x2_t a)
6496
- float32x2_t result;
6497
- __asm__ ("fabs %0.2s,%1.2s"
6500
- : /* No clobbers */);
6504
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6505
vabs_s8 (int8x8_t a)
6507
@@ -4512,28 +4501,6 @@
6511
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6512
-vabsq_f32 (float32x4_t a)
6514
- float32x4_t result;
6515
- __asm__ ("fabs %0.4s,%1.4s"
6518
- : /* No clobbers */);
6522
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6523
-vabsq_f64 (float64x2_t a)
6525
- float64x2_t result;
6526
- __asm__ ("fabs %0.2d,%1.2d"
6529
- : /* No clobbers */);
6533
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6534
vabsq_s8 (int8x16_t a)
6536
@@ -4578,50 +4545,6 @@
6540
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6541
-vacged_f64 (float64_t a, float64_t b)
6544
- __asm__ ("facge %d0,%d1,%d2"
6547
- : /* No clobbers */);
6551
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6552
-vacges_f32 (float32_t a, float32_t b)
6555
- __asm__ ("facge %s0,%s1,%s2"
6558
- : /* No clobbers */);
6562
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6563
-vacgtd_f64 (float64_t a, float64_t b)
6566
- __asm__ ("facgt %d0,%d1,%d2"
6569
- : /* No clobbers */);
6573
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6574
-vacgts_f32 (float32_t a, float32_t b)
6577
- __asm__ ("facgt %s0,%s1,%s2"
6580
- : /* No clobbers */);
6584
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
6585
vaddlv_s8 (int8x8_t a)
6587
@@ -5095,358 +5018,6 @@
6591
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6592
-vcage_f32 (float32x2_t a, float32x2_t b)
6594
- uint32x2_t result;
6595
- __asm__ ("facge %0.2s, %1.2s, %2.2s"
6598
- : /* No clobbers */);
6602
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6603
-vcageq_f32 (float32x4_t a, float32x4_t b)
6605
- uint32x4_t result;
6606
- __asm__ ("facge %0.4s, %1.4s, %2.4s"
6609
- : /* No clobbers */);
6613
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6614
-vcageq_f64 (float64x2_t a, float64x2_t b)
6616
- uint64x2_t result;
6617
- __asm__ ("facge %0.2d, %1.2d, %2.2d"
6620
- : /* No clobbers */);
6624
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6625
-vcagt_f32 (float32x2_t a, float32x2_t b)
6627
- uint32x2_t result;
6628
- __asm__ ("facgt %0.2s, %1.2s, %2.2s"
6631
- : /* No clobbers */);
6635
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6636
-vcagtq_f32 (float32x4_t a, float32x4_t b)
6638
- uint32x4_t result;
6639
- __asm__ ("facgt %0.4s, %1.4s, %2.4s"
6642
- : /* No clobbers */);
6646
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6647
-vcagtq_f64 (float64x2_t a, float64x2_t b)
6649
- uint64x2_t result;
6650
- __asm__ ("facgt %0.2d, %1.2d, %2.2d"
6653
- : /* No clobbers */);
6657
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6658
-vcale_f32 (float32x2_t a, float32x2_t b)
6660
- uint32x2_t result;
6661
- __asm__ ("facge %0.2s, %2.2s, %1.2s"
6664
- : /* No clobbers */);
6668
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6669
-vcaleq_f32 (float32x4_t a, float32x4_t b)
6671
- uint32x4_t result;
6672
- __asm__ ("facge %0.4s, %2.4s, %1.4s"
6675
- : /* No clobbers */);
6679
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6680
-vcaleq_f64 (float64x2_t a, float64x2_t b)
6682
- uint64x2_t result;
6683
- __asm__ ("facge %0.2d, %2.2d, %1.2d"
6686
- : /* No clobbers */);
6690
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6691
-vcalt_f32 (float32x2_t a, float32x2_t b)
6693
- uint32x2_t result;
6694
- __asm__ ("facgt %0.2s, %2.2s, %1.2s"
6697
- : /* No clobbers */);
6701
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6702
-vcaltq_f32 (float32x4_t a, float32x4_t b)
6704
- uint32x4_t result;
6705
- __asm__ ("facgt %0.4s, %2.4s, %1.4s"
6708
- : /* No clobbers */);
6712
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6713
-vcaltq_f64 (float64x2_t a, float64x2_t b)
6715
- uint64x2_t result;
6716
- __asm__ ("facgt %0.2d, %2.2d, %1.2d"
6719
- : /* No clobbers */);
6723
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6724
-vceq_f32 (float32x2_t a, float32x2_t b)
6726
- uint32x2_t result;
6727
- __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
6730
- : /* No clobbers */);
6734
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6735
-vceq_f64 (float64x1_t a, float64x1_t b)
6737
- uint64x1_t result;
6738
- __asm__ ("fcmeq %d0, %d1, %d2"
6741
- : /* No clobbers */);
6745
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6746
-vceqd_f64 (float64_t a, float64_t b)
6749
- __asm__ ("fcmeq %d0,%d1,%d2"
6752
- : /* No clobbers */);
6756
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6757
-vceqq_f32 (float32x4_t a, float32x4_t b)
6759
- uint32x4_t result;
6760
- __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
6763
- : /* No clobbers */);
6767
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6768
-vceqq_f64 (float64x2_t a, float64x2_t b)
6770
- uint64x2_t result;
6771
- __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
6774
- : /* No clobbers */);
6778
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6779
-vceqs_f32 (float32_t a, float32_t b)
6782
- __asm__ ("fcmeq %s0,%s1,%s2"
6785
- : /* No clobbers */);
6789
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6790
-vceqzd_f64 (float64_t a)
6793
- __asm__ ("fcmeq %d0,%d1,#0"
6796
- : /* No clobbers */);
6800
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6801
-vceqzs_f32 (float32_t a)
6804
- __asm__ ("fcmeq %s0,%s1,#0"
6807
- : /* No clobbers */);
6811
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6812
-vcge_f32 (float32x2_t a, float32x2_t b)
6814
- uint32x2_t result;
6815
- __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
6818
- : /* No clobbers */);
6822
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6823
-vcge_f64 (float64x1_t a, float64x1_t b)
6825
- uint64x1_t result;
6826
- __asm__ ("fcmge %d0, %d1, %d2"
6829
- : /* No clobbers */);
6833
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6834
-vcgeq_f32 (float32x4_t a, float32x4_t b)
6836
- uint32x4_t result;
6837
- __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
6840
- : /* No clobbers */);
6844
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6845
-vcgeq_f64 (float64x2_t a, float64x2_t b)
6847
- uint64x2_t result;
6848
- __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
6851
- : /* No clobbers */);
6855
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6856
-vcgt_f32 (float32x2_t a, float32x2_t b)
6858
- uint32x2_t result;
6859
- __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
6862
- : /* No clobbers */);
6866
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6867
-vcgt_f64 (float64x1_t a, float64x1_t b)
6869
- uint64x1_t result;
6870
- __asm__ ("fcmgt %d0, %d1, %d2"
6873
- : /* No clobbers */);
6877
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6878
-vcgtq_f32 (float32x4_t a, float32x4_t b)
6880
- uint32x4_t result;
6881
- __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
6884
- : /* No clobbers */);
6888
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6889
-vcgtq_f64 (float64x2_t a, float64x2_t b)
6891
- uint64x2_t result;
6892
- __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
6895
- : /* No clobbers */);
6899
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6900
-vcle_f32 (float32x2_t a, float32x2_t b)
6902
- uint32x2_t result;
6903
- __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
6906
- : /* No clobbers */);
6910
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6911
-vcle_f64 (float64x1_t a, float64x1_t b)
6913
- uint64x1_t result;
6914
- __asm__ ("fcmge %d0, %d2, %d1"
6917
- : /* No clobbers */);
6921
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6922
-vcleq_f32 (float32x4_t a, float32x4_t b)
6924
- uint32x4_t result;
6925
- __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
6928
- : /* No clobbers */);
6932
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6933
-vcleq_f64 (float64x2_t a, float64x2_t b)
6935
- uint64x2_t result;
6936
- __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
6939
- : /* No clobbers */);
6943
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6944
vcls_s8 (int8x8_t a)
6946
@@ -5513,50 +5084,6 @@
6950
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6951
-vclt_f32 (float32x2_t a, float32x2_t b)
6953
- uint32x2_t result;
6954
- __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
6957
- : /* No clobbers */);
6961
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6962
-vclt_f64 (float64x1_t a, float64x1_t b)
6964
- uint64x1_t result;
6965
- __asm__ ("fcmgt %d0, %d2, %d1"
6968
- : /* No clobbers */);
6972
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6973
-vcltq_f32 (float32x4_t a, float32x4_t b)
6975
- uint32x4_t result;
6976
- __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
6979
- : /* No clobbers */);
6983
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6984
-vcltq_f64 (float64x2_t a, float64x2_t b)
6986
- uint64x2_t result;
6987
- __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
6990
- : /* No clobbers */);
6994
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6995
vclz_s8 (int8x8_t a)
6997
@@ -5915,100 +5442,12 @@
6999
/* vcvt_f32_f16 not supported */
7001
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7002
-vcvt_f32_f64 (float64x2_t a)
7004
- float32x2_t result;
7005
- __asm__ ("fcvtn %0.2s,%1.2d"
7008
- : /* No clobbers */);
7012
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7013
-vcvt_f32_s32 (int32x2_t a)
7015
- float32x2_t result;
7016
- __asm__ ("scvtf %0.2s, %1.2s"
7019
- : /* No clobbers */);
7023
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7024
-vcvt_f32_u32 (uint32x2_t a)
7026
- float32x2_t result;
7027
- __asm__ ("ucvtf %0.2s, %1.2s"
7030
- : /* No clobbers */);
7034
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7035
-vcvt_f64_f32 (float32x2_t a)
7037
- float64x2_t result;
7038
- __asm__ ("fcvtl %0.2d,%1.2s"
7041
- : /* No clobbers */);
7045
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
7046
-vcvt_f64_s64 (uint64x1_t a)
7048
- float64x1_t result;
7049
- __asm__ ("scvtf %d0, %d1"
7052
- : /* No clobbers */);
7056
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
7057
-vcvt_f64_u64 (uint64x1_t a)
7059
- float64x1_t result;
7060
- __asm__ ("ucvtf %d0, %d1"
7063
- : /* No clobbers */);
7067
/* vcvt_high_f16_f32 not supported */
7069
/* vcvt_high_f32_f16 not supported */
7071
static float32x2_t vdup_n_f32 (float32_t);
7073
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7074
-vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
7076
- float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
7077
- __asm__ ("fcvtn2 %0.4s,%2.2d"
7080
- : /* No clobbers */);
7084
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7085
-vcvt_high_f64_f32 (float32x4_t a)
7087
- float64x2_t result;
7088
- __asm__ ("fcvtl2 %0.2d,%1.4s"
7091
- : /* No clobbers */);
7095
#define vcvt_n_f32_s32(a, b) \
7098
@@ -6057,160 +5496,6 @@
7102
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7103
-vcvt_s32_f32 (float32x2_t a)
7106
- __asm__ ("fcvtzs %0.2s, %1.2s"
7109
- : /* No clobbers */);
7113
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7114
-vcvt_u32_f32 (float32x2_t a)
7116
- uint32x2_t result;
7117
- __asm__ ("fcvtzu %0.2s, %1.2s"
7120
- : /* No clobbers */);
7124
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7125
-vcvta_s32_f32 (float32x2_t a)
7128
- __asm__ ("fcvtas %0.2s, %1.2s"
7131
- : /* No clobbers */);
7135
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7136
-vcvta_u32_f32 (float32x2_t a)
7138
- uint32x2_t result;
7139
- __asm__ ("fcvtau %0.2s, %1.2s"
7142
- : /* No clobbers */);
7146
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7147
-vcvtad_s64_f64 (float64_t a)
7150
- __asm__ ("fcvtas %d0,%d1"
7153
- : /* No clobbers */);
7157
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7158
-vcvtad_u64_f64 (float64_t a)
7161
- __asm__ ("fcvtau %d0,%d1"
7164
- : /* No clobbers */);
7168
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7169
-vcvtaq_s32_f32 (float32x4_t a)
7172
- __asm__ ("fcvtas %0.4s, %1.4s"
7175
- : /* No clobbers */);
7179
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7180
-vcvtaq_s64_f64 (float64x2_t a)
7183
- __asm__ ("fcvtas %0.2d, %1.2d"
7186
- : /* No clobbers */);
7190
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7191
-vcvtaq_u32_f32 (float32x4_t a)
7193
- uint32x4_t result;
7194
- __asm__ ("fcvtau %0.4s, %1.4s"
7197
- : /* No clobbers */);
7201
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7202
-vcvtaq_u64_f64 (float64x2_t a)
7204
- uint64x2_t result;
7205
- __asm__ ("fcvtau %0.2d, %1.2d"
7208
- : /* No clobbers */);
7212
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7213
-vcvtas_s64_f64 (float32_t a)
7216
- __asm__ ("fcvtas %s0,%s1"
7219
- : /* No clobbers */);
7223
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7224
-vcvtas_u64_f64 (float32_t a)
7227
- __asm__ ("fcvtau %s0,%s1"
7230
- : /* No clobbers */);
7234
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
7235
-vcvtd_f64_s64 (int64_t a)
7238
- __asm__ ("scvtf %d0,%d1"
7241
- : /* No clobbers */);
7245
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
7246
-vcvtd_f64_u64 (uint64_t a)
7249
- __asm__ ("ucvtf %d0,%d1"
7252
- : /* No clobbers */);
7256
#define vcvtd_n_f64_s64(a, b) \
7259
@@ -6259,402 +5544,6 @@
7263
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7264
-vcvtd_s64_f64 (float64_t a)
7267
- __asm__ ("fcvtzs %d0,%d1"
7270
- : /* No clobbers */);
7274
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7275
-vcvtd_u64_f64 (float64_t a)
7278
- __asm__ ("fcvtzu %d0,%d1"
7281
- : /* No clobbers */);
7285
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7286
-vcvtm_s32_f32 (float32x2_t a)
7289
- __asm__ ("fcvtms %0.2s, %1.2s"
7292
- : /* No clobbers */);
7296
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7297
-vcvtm_u32_f32 (float32x2_t a)
7299
- uint32x2_t result;
7300
- __asm__ ("fcvtmu %0.2s, %1.2s"
7303
- : /* No clobbers */);
7307
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7308
-vcvtmd_s64_f64 (float64_t a)
7311
- __asm__ ("fcvtms %d0,%d1"
7314
- : /* No clobbers */);
7318
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7319
-vcvtmd_u64_f64 (float64_t a)
7322
- __asm__ ("fcvtmu %d0,%d1"
7325
- : /* No clobbers */);
7329
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7330
-vcvtmq_s32_f32 (float32x4_t a)
7333
- __asm__ ("fcvtms %0.4s, %1.4s"
7336
- : /* No clobbers */);
7340
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7341
-vcvtmq_s64_f64 (float64x2_t a)
7344
- __asm__ ("fcvtms %0.2d, %1.2d"
7347
- : /* No clobbers */);
7351
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7352
-vcvtmq_u32_f32 (float32x4_t a)
7354
- uint32x4_t result;
7355
- __asm__ ("fcvtmu %0.4s, %1.4s"
7358
- : /* No clobbers */);
7362
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7363
-vcvtmq_u64_f64 (float64x2_t a)
7365
- uint64x2_t result;
7366
- __asm__ ("fcvtmu %0.2d, %1.2d"
7369
- : /* No clobbers */);
7373
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7374
-vcvtms_s64_f64 (float32_t a)
7377
- __asm__ ("fcvtms %s0,%s1"
7380
- : /* No clobbers */);
7384
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7385
-vcvtms_u64_f64 (float32_t a)
7388
- __asm__ ("fcvtmu %s0,%s1"
7391
- : /* No clobbers */);
7395
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7396
-vcvtn_s32_f32 (float32x2_t a)
7399
- __asm__ ("fcvtns %0.2s, %1.2s"
7402
- : /* No clobbers */);
7406
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7407
-vcvtn_u32_f32 (float32x2_t a)
7409
- uint32x2_t result;
7410
- __asm__ ("fcvtnu %0.2s, %1.2s"
7413
- : /* No clobbers */);
7417
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7418
-vcvtnd_s64_f64 (float64_t a)
7421
- __asm__ ("fcvtns %d0,%d1"
7424
- : /* No clobbers */);
7428
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7429
-vcvtnd_u64_f64 (float64_t a)
7432
- __asm__ ("fcvtnu %d0,%d1"
7435
- : /* No clobbers */);
7439
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7440
-vcvtnq_s32_f32 (float32x4_t a)
7443
- __asm__ ("fcvtns %0.4s, %1.4s"
7446
- : /* No clobbers */);
7450
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7451
-vcvtnq_s64_f64 (float64x2_t a)
7454
- __asm__ ("fcvtns %0.2d, %1.2d"
7457
- : /* No clobbers */);
7461
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7462
-vcvtnq_u32_f32 (float32x4_t a)
7464
- uint32x4_t result;
7465
- __asm__ ("fcvtnu %0.4s, %1.4s"
7468
- : /* No clobbers */);
7472
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7473
-vcvtnq_u64_f64 (float64x2_t a)
7475
- uint64x2_t result;
7476
- __asm__ ("fcvtnu %0.2d, %1.2d"
7479
- : /* No clobbers */);
7483
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7484
-vcvtns_s64_f64 (float32_t a)
7487
- __asm__ ("fcvtns %s0,%s1"
7490
- : /* No clobbers */);
7494
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7495
-vcvtns_u64_f64 (float32_t a)
7498
- __asm__ ("fcvtnu %s0,%s1"
7501
- : /* No clobbers */);
7505
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7506
-vcvtp_s32_f32 (float32x2_t a)
7509
- __asm__ ("fcvtps %0.2s, %1.2s"
7512
- : /* No clobbers */);
7516
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7517
-vcvtp_u32_f32 (float32x2_t a)
7519
- uint32x2_t result;
7520
- __asm__ ("fcvtpu %0.2s, %1.2s"
7523
- : /* No clobbers */);
7527
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7528
-vcvtpd_s64_f64 (float64_t a)
7531
- __asm__ ("fcvtps %d0,%d1"
7534
- : /* No clobbers */);
7538
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7539
-vcvtpd_u64_f64 (float64_t a)
7542
- __asm__ ("fcvtpu %d0,%d1"
7545
- : /* No clobbers */);
7549
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7550
-vcvtpq_s32_f32 (float32x4_t a)
7553
- __asm__ ("fcvtps %0.4s, %1.4s"
7556
- : /* No clobbers */);
7560
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7561
-vcvtpq_s64_f64 (float64x2_t a)
7564
- __asm__ ("fcvtps %0.2d, %1.2d"
7567
- : /* No clobbers */);
7571
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7572
-vcvtpq_u32_f32 (float32x4_t a)
7574
- uint32x4_t result;
7575
- __asm__ ("fcvtpu %0.4s, %1.4s"
7578
- : /* No clobbers */);
7582
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7583
-vcvtpq_u64_f64 (float64x2_t a)
7585
- uint64x2_t result;
7586
- __asm__ ("fcvtpu %0.2d, %1.2d"
7589
- : /* No clobbers */);
7593
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7594
-vcvtps_s64_f64 (float32_t a)
7597
- __asm__ ("fcvtps %s0,%s1"
7600
- : /* No clobbers */);
7604
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7605
-vcvtps_u64_f64 (float32_t a)
7608
- __asm__ ("fcvtpu %s0,%s1"
7611
- : /* No clobbers */);
7615
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7616
-vcvtq_f32_s32 (int32x4_t a)
7618
- float32x4_t result;
7619
- __asm__ ("scvtf %0.4s, %1.4s"
7622
- : /* No clobbers */);
7626
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7627
-vcvtq_f32_u32 (uint32x4_t a)
7629
- float32x4_t result;
7630
- __asm__ ("ucvtf %0.4s, %1.4s"
7633
- : /* No clobbers */);
7637
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7638
-vcvtq_f64_s64 (int64x2_t a)
7640
- float64x2_t result;
7641
- __asm__ ("scvtf %0.2d, %1.2d"
7644
- : /* No clobbers */);
7648
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7649
-vcvtq_f64_u64 (uint64x2_t a)
7651
- float64x2_t result;
7652
- __asm__ ("ucvtf %0.2d, %1.2d"
7655
- : /* No clobbers */);
7659
#define vcvtq_n_f32_s32(a, b) \
7662
@@ -6751,72 +5640,6 @@
7666
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7667
-vcvtq_s32_f32 (float32x4_t a)
7670
- __asm__ ("fcvtzs %0.4s, %1.4s"
7673
- : /* No clobbers */);
7677
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7678
-vcvtq_s64_f64 (float64x2_t a)
7681
- __asm__ ("fcvtzs %0.2d, %1.2d"
7684
- : /* No clobbers */);
7688
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7689
-vcvtq_u32_f32 (float32x4_t a)
7691
- uint32x4_t result;
7692
- __asm__ ("fcvtzu %0.4s, %1.4s"
7695
- : /* No clobbers */);
7699
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7700
-vcvtq_u64_f64 (float64x2_t a)
7702
- uint64x2_t result;
7703
- __asm__ ("fcvtzu %0.2d, %1.2d"
7706
- : /* No clobbers */);
7710
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
7711
-vcvts_f64_s32 (int32_t a)
7714
- __asm__ ("scvtf %s0,%s1"
7717
- : /* No clobbers */);
7721
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
7722
-vcvts_f64_u32 (uint32_t a)
7725
- __asm__ ("ucvtf %s0,%s1"
7728
- : /* No clobbers */);
7732
#define vcvts_n_f32_s32(a, b) \
7735
@@ -6865,28 +5688,6 @@
7739
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7740
-vcvts_s64_f64 (float32_t a)
7743
- __asm__ ("fcvtzs %s0,%s1"
7746
- : /* No clobbers */);
7750
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7751
-vcvts_u64_f64 (float32_t a)
7754
- __asm__ ("fcvtzu %s0,%s1"
7757
- : /* No clobbers */);
7761
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7762
vcvtx_f32_f64 (float64x2_t a)
7764
@@ -14556,17 +13357,6 @@
7768
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7769
-vrecpe_f32 (float32x2_t a)
7771
- float32x2_t result;
7772
- __asm__ ("frecpe %0.2s,%1.2s"
7775
- : /* No clobbers */);
7779
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7780
vrecpe_u32 (uint32x2_t a)
7782
@@ -14578,39 +13368,6 @@
7786
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7787
-vrecped_f64 (float64_t a)
7790
- __asm__ ("frecpe %d0,%d1"
7793
- : /* No clobbers */);
7797
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7798
-vrecpeq_f32 (float32x4_t a)
7800
- float32x4_t result;
7801
- __asm__ ("frecpe %0.4s,%1.4s"
7804
- : /* No clobbers */);
7808
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7809
-vrecpeq_f64 (float64x2_t a)
7811
- float64x2_t result;
7812
- __asm__ ("frecpe %0.2d,%1.2d"
7815
- : /* No clobbers */);
7819
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7820
vrecpeq_u32 (uint32x4_t a)
7822
@@ -14622,94 +13379,6 @@
7826
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7827
-vrecpes_f32 (float32_t a)
7830
- __asm__ ("frecpe %s0,%s1"
7833
- : /* No clobbers */);
7837
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7838
-vrecps_f32 (float32x2_t a, float32x2_t b)
7840
- float32x2_t result;
7841
- __asm__ ("frecps %0.2s,%1.2s,%2.2s"
7844
- : /* No clobbers */);
7848
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7849
-vrecpsd_f64 (float64_t a, float64_t b)
7852
- __asm__ ("frecps %d0,%d1,%d2"
7855
- : /* No clobbers */);
7859
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7860
-vrecpsq_f32 (float32x4_t a, float32x4_t b)
7862
- float32x4_t result;
7863
- __asm__ ("frecps %0.4s,%1.4s,%2.4s"
7866
- : /* No clobbers */);
7870
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7871
-vrecpsq_f64 (float64x2_t a, float64x2_t b)
7873
- float64x2_t result;
7874
- __asm__ ("frecps %0.2d,%1.2d,%2.2d"
7877
- : /* No clobbers */);
7881
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7882
-vrecpss_f32 (float32_t a, float32_t b)
7885
- __asm__ ("frecps %s0,%s1,%s2"
7888
- : /* No clobbers */);
7892
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7893
-vrecpxd_f64 (float64_t a)
7896
- __asm__ ("frecpe %d0,%d1"
7899
- : /* No clobbers */);
7903
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7904
-vrecpxs_f32 (float32_t a)
7907
- __asm__ ("frecpe %s0,%s1"
7910
- : /* No clobbers */);
7914
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7915
vrev16_p8 (poly8x8_t a)
7917
@@ -15106,171 +13775,6 @@
7921
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7922
-vrnd_f32 (float32x2_t a)
7924
- float32x2_t result;
7925
- __asm__ ("frintz %0.2s,%1.2s"
7928
- : /* No clobbers */);
7932
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7933
-vrnda_f32 (float32x2_t a)
7935
- float32x2_t result;
7936
- __asm__ ("frinta %0.2s,%1.2s"
7939
- : /* No clobbers */);
7943
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7944
-vrndm_f32 (float32x2_t a)
7946
- float32x2_t result;
7947
- __asm__ ("frintm %0.2s,%1.2s"
7950
- : /* No clobbers */);
7954
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7955
-vrndn_f32 (float32x2_t a)
7957
- float32x2_t result;
7958
- __asm__ ("frintn %0.2s,%1.2s"
7961
- : /* No clobbers */);
7965
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7966
-vrndp_f32 (float32x2_t a)
7968
- float32x2_t result;
7969
- __asm__ ("frintp %0.2s,%1.2s"
7972
- : /* No clobbers */);
7976
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7977
-vrndq_f32 (float32x4_t a)
7979
- float32x4_t result;
7980
- __asm__ ("frintz %0.4s,%1.4s"
7983
- : /* No clobbers */);
7987
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7988
-vrndq_f64 (float64x2_t a)
7990
- float64x2_t result;
7991
- __asm__ ("frintz %0.2d,%1.2d"
7994
- : /* No clobbers */);
7998
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7999
-vrndqa_f32 (float32x4_t a)
8001
- float32x4_t result;
8002
- __asm__ ("frinta %0.4s,%1.4s"
8005
- : /* No clobbers */);
8009
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8010
-vrndqa_f64 (float64x2_t a)
8012
- float64x2_t result;
8013
- __asm__ ("frinta %0.2d,%1.2d"
8016
- : /* No clobbers */);
8020
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8021
-vrndqm_f32 (float32x4_t a)
8023
- float32x4_t result;
8024
- __asm__ ("frintm %0.4s,%1.4s"
8027
- : /* No clobbers */);
8031
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8032
-vrndqm_f64 (float64x2_t a)
8034
- float64x2_t result;
8035
- __asm__ ("frintm %0.2d,%1.2d"
8038
- : /* No clobbers */);
8042
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8043
-vrndqn_f32 (float32x4_t a)
8045
- float32x4_t result;
8046
- __asm__ ("frintn %0.4s,%1.4s"
8049
- : /* No clobbers */);
8053
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8054
-vrndqn_f64 (float64x2_t a)
8056
- float64x2_t result;
8057
- __asm__ ("frintn %0.2d,%1.2d"
8060
- : /* No clobbers */);
8064
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8065
-vrndqp_f32 (float32x4_t a)
8067
- float32x4_t result;
8068
- __asm__ ("frintp %0.4s,%1.4s"
8071
- : /* No clobbers */);
8075
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8076
-vrndqp_f64 (float64x2_t a)
8078
- float64x2_t result;
8079
- __asm__ ("frintp %0.2d,%1.2d"
8082
- : /* No clobbers */);
8086
#define vrshrn_high_n_s16(a, b, c) \
8089
@@ -19849,6 +18353,26 @@
8091
/* Start of optimal implementations in approved order. */
8095
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8096
+vabs_f32 (float32x2_t __a)
8098
+ return __builtin_aarch64_absv2sf (__a);
8101
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8102
+vabsq_f32 (float32x4_t __a)
8104
+ return __builtin_aarch64_absv4sf (__a);
8107
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8108
+vabsq_f64 (float64x2_t __a)
8110
+ return __builtin_aarch64_absv2df (__a);
8115
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8116
@@ -19863,8 +18387,145 @@
8121
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
8122
+vaddv_f32 (float32x2_t __a)
8124
+ float32x2_t t = __builtin_aarch64_addvv2sf (__a);
8125
+ return vget_lane_f32 (t, 0);
8128
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
8129
+vaddvq_f32 (float32x4_t __a)
8131
+ float32x4_t t = __builtin_aarch64_addvv4sf (__a);
8132
+ return vgetq_lane_f32 (t, 0);
8135
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
8136
+vaddvq_f64 (float64x2_t __a)
8138
+ float64x2_t t = __builtin_aarch64_addvv2df (__a);
8139
+ return vgetq_lane_f64 (t, 0);
8144
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8145
+vcages_f32 (float32_t __a, float32_t __b)
8147
+ return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
8150
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8151
+vcage_f32 (float32x2_t __a, float32x2_t __b)
8153
+ return vabs_f32 (__a) >= vabs_f32 (__b);
8156
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8157
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
8159
+ return vabsq_f32 (__a) >= vabsq_f32 (__b);
8162
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8163
+vcaged_f64 (float64_t __a, float64_t __b)
8165
+ return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
8168
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8169
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
8171
+ return vabsq_f64 (__a) >= vabsq_f64 (__b);
8176
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8177
+vcagts_f32 (float32_t __a, float32_t __b)
8179
+ return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
8182
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8183
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
8185
+ return vabs_f32 (__a) > vabs_f32 (__b);
8188
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8189
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
8191
+ return vabsq_f32 (__a) > vabsq_f32 (__b);
8194
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8195
+vcagtd_f64 (float64_t __a, float64_t __b)
8197
+ return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
8200
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8201
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
8203
+ return vabsq_f64 (__a) > vabsq_f64 (__b);
8208
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8209
+vcale_f32 (float32x2_t __a, float32x2_t __b)
8211
+ return vabs_f32 (__a) <= vabs_f32 (__b);
8214
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8215
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
8217
+ return vabsq_f32 (__a) <= vabsq_f32 (__b);
8220
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8221
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
8223
+ return vabsq_f64 (__a) <= vabsq_f64 (__b);
8228
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8229
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
8231
+ return vabs_f32 (__a) < vabs_f32 (__b);
8234
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8235
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
8237
+ return vabsq_f32 (__a) < vabsq_f32 (__b);
8240
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8241
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
8243
+ return vabsq_f64 (__a) < vabsq_f64 (__b);
8246
+/* vceq - vector. */
8248
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8249
+vceq_f32 (float32x2_t __a, float32x2_t __b)
8251
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
8254
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8255
+vceq_f64 (float64x1_t __a, float64x1_t __b)
8257
+ return __a == __b ? -1ll : 0ll;
8260
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8261
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
8263
@@ -19893,7 +18554,7 @@
8264
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8265
vceq_s64 (int64x1_t __a, int64x1_t __b)
8267
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
8268
+ return __a == __b ? -1ll : 0ll;
8271
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8272
@@ -19920,10 +18581,21 @@
8273
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8274
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
8276
- return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
8278
+ return __a == __b ? -1ll : 0ll;
8281
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8282
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
8284
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
8287
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8288
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
8290
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
8293
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8294
vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
8296
@@ -19983,27 +18655,245 @@
8300
+/* vceq - scalar. */
8302
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8303
+vceqs_f32 (float32_t __a, float32_t __b)
8305
+ return __a == __b ? -1 : 0;
8308
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8309
vceqd_s64 (int64x1_t __a, int64x1_t __b)
8311
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
8312
+ return __a == __b ? -1ll : 0ll;
8315
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8316
vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
8318
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
8319
+ return __a == __b ? -1ll : 0ll;
8322
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8323
+vceqd_f64 (float64_t __a, float64_t __b)
8325
+ return __a == __b ? -1ll : 0ll;
8328
+/* vceqz - vector. */
8330
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8331
+vceqz_f32 (float32x2_t __a)
8333
+ float32x2_t __b = {0.0f, 0.0f};
8334
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
8337
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8338
+vceqz_f64 (float64x1_t __a)
8340
+ return __a == 0.0 ? -1ll : 0ll;
8343
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8344
+vceqz_p8 (poly8x8_t __a)
8346
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8347
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
8351
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8352
+vceqz_s8 (int8x8_t __a)
8354
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8355
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
8358
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8359
+vceqz_s16 (int16x4_t __a)
8361
+ int16x4_t __b = {0, 0, 0, 0};
8362
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
8365
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8366
+vceqz_s32 (int32x2_t __a)
8368
+ int32x2_t __b = {0, 0};
8369
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
8372
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8373
+vceqz_s64 (int64x1_t __a)
8375
+ return __a == 0ll ? -1ll : 0ll;
8378
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8379
+vceqz_u8 (uint8x8_t __a)
8381
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8382
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
8386
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8387
+vceqz_u16 (uint16x4_t __a)
8389
+ uint16x4_t __b = {0, 0, 0, 0};
8390
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
8394
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8395
+vceqz_u32 (uint32x2_t __a)
8397
+ uint32x2_t __b = {0, 0};
8398
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
8402
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8403
+vceqz_u64 (uint64x1_t __a)
8405
+ return __a == 0ll ? -1ll : 0ll;
8408
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8409
+vceqzq_f32 (float32x4_t __a)
8411
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
8412
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
8415
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8416
+vceqzq_f64 (float64x2_t __a)
8418
+ float64x2_t __b = {0.0, 0.0};
8419
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
8422
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8423
+vceqzq_p8 (poly8x16_t __a)
8425
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8426
+ 0, 0, 0, 0, 0, 0, 0, 0};
8427
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
8431
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8432
+vceqzq_s8 (int8x16_t __a)
8434
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8435
+ 0, 0, 0, 0, 0, 0, 0, 0};
8436
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
8439
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8440
+vceqzq_s16 (int16x8_t __a)
8442
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8443
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
8446
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8447
+vceqzq_s32 (int32x4_t __a)
8449
+ int32x4_t __b = {0, 0, 0, 0};
8450
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
8453
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8454
+vceqzq_s64 (int64x2_t __a)
8456
+ int64x2_t __b = {0, 0};
8457
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
8460
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8461
+vceqzq_u8 (uint8x16_t __a)
8463
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8464
+ 0, 0, 0, 0, 0, 0, 0, 0};
8465
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
8469
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8470
+vceqzq_u16 (uint16x8_t __a)
8472
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8473
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
8477
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8478
+vceqzq_u32 (uint32x4_t __a)
8480
+ uint32x4_t __b = {0, 0, 0, 0};
8481
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
8485
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8486
+vceqzq_u64 (uint64x2_t __a)
8488
+ uint64x2_t __b = {0, 0};
8489
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
8493
+/* vceqz - scalar. */
8495
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8496
+vceqzs_f32 (float32_t __a)
8498
+ return __a == 0.0f ? -1 : 0;
8501
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8502
vceqzd_s64 (int64x1_t __a)
8504
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
8505
+ return __a == 0 ? -1ll : 0ll;
8509
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8510
+vceqzd_u64 (int64x1_t __a)
8512
+ return __a == 0 ? -1ll : 0ll;
8515
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8516
+vceqzd_f64 (float64_t __a)
8518
+ return __a == 0.0 ? -1ll : 0ll;
8521
+/* vcge - vector. */
8523
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8524
+vcge_f32 (float32x2_t __a, float32x2_t __b)
8526
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
8529
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8530
+vcge_f64 (float64x1_t __a, float64x1_t __b)
8532
+ return __a >= __b ? -1ll : 0ll;
8535
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8536
+vcge_p8 (poly8x8_t __a, poly8x8_t __b)
8538
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
8542
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8543
vcge_s8 (int8x8_t __a, int8x8_t __b)
8545
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
8546
@@ -20024,38 +18914,56 @@
8547
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8548
vcge_s64 (int64x1_t __a, int64x1_t __b)
8550
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
8551
+ return __a >= __b ? -1ll : 0ll;
8554
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8555
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
8557
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
8558
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
8562
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8563
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
8565
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
8566
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
8570
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8571
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
8573
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
8574
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
8578
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8579
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
8581
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
8583
+ return __a >= __b ? -1ll : 0ll;
8586
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8587
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
8589
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
8592
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8593
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
8595
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
8598
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8599
+vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
8601
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
8605
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8606
vcgeq_s8 (int8x16_t __a, int8x16_t __b)
8608
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
8609
@@ -20082,53 +18990,270 @@
8610
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8611
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
8613
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
8614
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
8618
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8619
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
8621
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
8622
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
8626
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8627
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
8629
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
8630
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
8634
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8635
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
8637
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
8638
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
8642
+/* vcge - scalar. */
8644
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8645
+vcges_f32 (float32_t __a, float32_t __b)
8647
+ return __a >= __b ? -1 : 0;
8650
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8651
vcged_s64 (int64x1_t __a, int64x1_t __b)
8653
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
8654
+ return __a >= __b ? -1ll : 0ll;
8657
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8658
vcged_u64 (uint64x1_t __a, uint64x1_t __b)
8660
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
8662
+ return __a >= __b ? -1ll : 0ll;
8665
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8666
+vcged_f64 (float64_t __a, float64_t __b)
8668
+ return __a >= __b ? -1ll : 0ll;
8671
+/* vcgez - vector. */
8673
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8674
+vcgez_f32 (float32x2_t __a)
8676
+ float32x2_t __b = {0.0f, 0.0f};
8677
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
8680
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8681
+vcgez_f64 (float64x1_t __a)
8683
+ return __a >= 0.0 ? -1ll : 0ll;
8686
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8687
+vcgez_p8 (poly8x8_t __a)
8689
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8690
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
8694
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8695
+vcgez_s8 (int8x8_t __a)
8697
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8698
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
8701
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8702
+vcgez_s16 (int16x4_t __a)
8704
+ int16x4_t __b = {0, 0, 0, 0};
8705
+ return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
8708
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8709
+vcgez_s32 (int32x2_t __a)
8711
+ int32x2_t __b = {0, 0};
8712
+ return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
8715
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8716
+vcgez_s64 (int64x1_t __a)
8718
+ return __a >= 0ll ? -1ll : 0ll;
8721
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8722
+vcgez_u8 (uint8x8_t __a)
8724
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8725
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
8729
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8730
+vcgez_u16 (uint16x4_t __a)
8732
+ uint16x4_t __b = {0, 0, 0, 0};
8733
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
8737
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8738
+vcgez_u32 (uint32x2_t __a)
8740
+ uint32x2_t __b = {0, 0};
8741
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
8745
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8746
+vcgez_u64 (uint64x1_t __a)
8748
+ return __a >= 0ll ? -1ll : 0ll;
8751
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8752
+vcgezq_f32 (float32x4_t __a)
8754
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
8755
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
8758
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8759
+vcgezq_f64 (float64x2_t __a)
8761
+ float64x2_t __b = {0.0, 0.0};
8762
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
8765
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8766
+vcgezq_p8 (poly8x16_t __a)
8768
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8769
+ 0, 0, 0, 0, 0, 0, 0, 0};
8770
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
8774
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8775
+vcgezq_s8 (int8x16_t __a)
8777
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8778
+ 0, 0, 0, 0, 0, 0, 0, 0};
8779
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
8782
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8783
+vcgezq_s16 (int16x8_t __a)
8785
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8786
+ return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
8789
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8790
+vcgezq_s32 (int32x4_t __a)
8792
+ int32x4_t __b = {0, 0, 0, 0};
8793
+ return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
8796
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8797
+vcgezq_s64 (int64x2_t __a)
8799
+ int64x2_t __b = {0, 0};
8800
+ return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
8803
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8804
+vcgezq_u8 (uint8x16_t __a)
8806
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
8807
+ 0, 0, 0, 0, 0, 0, 0, 0};
8808
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
8812
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8813
+vcgezq_u16 (uint16x8_t __a)
8815
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
8816
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
8820
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8821
+vcgezq_u32 (uint32x4_t __a)
8823
+ uint32x4_t __b = {0, 0, 0, 0};
8824
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
8828
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8829
+vcgezq_u64 (uint64x2_t __a)
8831
+ uint64x2_t __b = {0, 0};
8832
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
8836
+/* vcgez - scalar. */
8838
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8839
+vcgezs_f32 (float32_t __a)
8841
+ return __a >= 0.0f ? -1 : 0;
8844
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8845
vcgezd_s64 (int64x1_t __a)
8847
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
8848
+ return __a >= 0 ? -1ll : 0ll;
8852
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8853
+vcgezd_u64 (int64x1_t __a)
8855
+ return __a >= 0 ? -1ll : 0ll;
8858
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
8859
+vcgezd_f64 (float64_t __a)
8861
+ return __a >= 0.0 ? -1ll : 0ll;
8864
+/* vcgt - vector. */
8866
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8867
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
8869
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
8872
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8873
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
8875
+ return __a > __b ? -1ll : 0ll;
8878
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8879
+vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
8881
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
8885
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8886
vcgt_s8 (int8x8_t __a, int8x8_t __b)
8888
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
8889
@@ -20149,38 +19274,56 @@
8890
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8891
vcgt_s64 (int64x1_t __a, int64x1_t __b)
8893
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
8894
+ return __a > __b ? -1ll : 0ll;
8897
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8898
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
8900
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
8901
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
8905
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8906
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
8908
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
8909
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
8913
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8914
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
8916
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
8917
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
8921
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8922
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
8924
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
8926
+ return __a > __b ? -1ll : 0ll;
8929
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8930
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
8932
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
8935
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8936
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
8938
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
8941
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8942
+vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
8944
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
8948
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8949
vcgtq_s8 (int8x16_t __a, int8x16_t __b)
8951
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
8952
@@ -20207,53 +19350,270 @@
8953
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8954
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
8956
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
8957
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
8961
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8962
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
8964
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
8965
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
8969
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8970
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
8972
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
8973
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
8977
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8978
vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
8980
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
8981
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
8985
+/* vcgt - scalar. */
8987
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
8988
+vcgts_f32 (float32_t __a, float32_t __b)
8990
+ return __a > __b ? -1 : 0;
8993
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8994
vcgtd_s64 (int64x1_t __a, int64x1_t __b)
8996
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
8997
+ return __a > __b ? -1ll : 0ll;
9000
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9001
vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
9003
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
9005
+ return __a > __b ? -1ll : 0ll;
9008
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9009
+vcgtd_f64 (float64_t __a, float64_t __b)
9011
+ return __a > __b ? -1ll : 0ll;
9014
+/* vcgtz - vector. */
9016
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9017
+vcgtz_f32 (float32x2_t __a)
9019
+ float32x2_t __b = {0.0f, 0.0f};
9020
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
9023
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9024
+vcgtz_f64 (float64x1_t __a)
9026
+ return __a > 0.0 ? -1ll : 0ll;
9029
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9030
+vcgtz_p8 (poly8x8_t __a)
9032
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9033
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
9037
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9038
+vcgtz_s8 (int8x8_t __a)
9040
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9041
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
9044
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9045
+vcgtz_s16 (int16x4_t __a)
9047
+ int16x4_t __b = {0, 0, 0, 0};
9048
+ return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
9051
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9052
+vcgtz_s32 (int32x2_t __a)
9054
+ int32x2_t __b = {0, 0};
9055
+ return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
9058
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9059
+vcgtz_s64 (int64x1_t __a)
9061
+ return __a > 0ll ? -1ll : 0ll;
9064
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9065
+vcgtz_u8 (uint8x8_t __a)
9067
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9068
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
9072
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9073
+vcgtz_u16 (uint16x4_t __a)
9075
+ uint16x4_t __b = {0, 0, 0, 0};
9076
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
9080
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9081
+vcgtz_u32 (uint32x2_t __a)
9083
+ uint32x2_t __b = {0, 0};
9084
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
9088
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9089
+vcgtz_u64 (uint64x1_t __a)
9091
+ return __a > 0ll ? -1ll : 0ll;
9094
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9095
+vcgtzq_f32 (float32x4_t __a)
9097
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
9098
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
9101
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9102
+vcgtzq_f64 (float64x2_t __a)
9104
+ float64x2_t __b = {0.0, 0.0};
9105
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
9108
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9109
+vcgtzq_p8 (poly8x16_t __a)
9111
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9112
+ 0, 0, 0, 0, 0, 0, 0, 0};
9113
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
9117
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9118
+vcgtzq_s8 (int8x16_t __a)
9120
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9121
+ 0, 0, 0, 0, 0, 0, 0, 0};
9122
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
9125
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9126
+vcgtzq_s16 (int16x8_t __a)
9128
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9129
+ return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
9132
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9133
+vcgtzq_s32 (int32x4_t __a)
9135
+ int32x4_t __b = {0, 0, 0, 0};
9136
+ return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
9139
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9140
+vcgtzq_s64 (int64x2_t __a)
9142
+ int64x2_t __b = {0, 0};
9143
+ return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
9146
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9147
+vcgtzq_u8 (uint8x16_t __a)
9149
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9150
+ 0, 0, 0, 0, 0, 0, 0, 0};
9151
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
9155
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9156
+vcgtzq_u16 (uint16x8_t __a)
9158
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9159
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
9163
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9164
+vcgtzq_u32 (uint32x4_t __a)
9166
+ uint32x4_t __b = {0, 0, 0, 0};
9167
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
9171
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9172
+vcgtzq_u64 (uint64x2_t __a)
9174
+ uint64x2_t __b = {0, 0};
9175
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
9179
+/* vcgtz - scalar. */
9181
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9182
+vcgtzs_f32 (float32_t __a)
9184
+ return __a > 0.0f ? -1 : 0;
9187
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9188
vcgtzd_s64 (int64x1_t __a)
9190
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
9191
+ return __a > 0 ? -1ll : 0ll;
9195
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9196
+vcgtzd_u64 (int64x1_t __a)
9198
+ return __a > 0 ? -1ll : 0ll;
9201
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9202
+vcgtzd_f64 (float64_t __a)
9204
+ return __a > 0.0 ? -1ll : 0ll;
9207
+/* vcle - vector. */
9209
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9210
+vcle_f32 (float32x2_t __a, float32x2_t __b)
9212
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
9215
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9216
+vcle_f64 (float64x1_t __a, float64x1_t __b)
9218
+ return __a <= __b ? -1ll : 0ll;
9221
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9222
+vcle_p8 (poly8x8_t __a, poly8x8_t __b)
9224
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
9228
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9229
vcle_s8 (int8x8_t __a, int8x8_t __b)
9231
return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
9232
@@ -20274,38 +19634,56 @@
9233
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9234
vcle_s64 (int64x1_t __a, int64x1_t __b)
9236
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
9237
+ return __a <= __b ? -1ll : 0ll;
9240
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9241
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
9243
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
9244
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
9248
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9249
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
9251
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
9252
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
9256
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9257
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
9259
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
9260
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
9264
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9265
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
9267
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
9269
+ return __a <= __b ? -1ll : 0ll;
9272
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9273
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
9275
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
9278
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9279
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
9281
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
9284
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9285
+vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
9287
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
9291
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9292
vcleq_s8 (int8x16_t __a, int8x16_t __b)
9294
return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
9295
@@ -20332,46 +19710,213 @@
9296
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9297
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
9299
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
9300
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
9304
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9305
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
9307
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
9308
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
9312
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9313
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
9315
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
9316
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
9320
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9321
vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
9323
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
9324
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
9328
+/* vcle - scalar. */
9330
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9331
+vcles_f32 (float32_t __a, float32_t __b)
9333
+ return __a <= __b ? -1 : 0;
9336
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9337
vcled_s64 (int64x1_t __a, int64x1_t __b)
9339
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
9340
+ return __a <= __b ? -1ll : 0ll;
9343
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9344
+vcled_u64 (uint64x1_t __a, uint64x1_t __b)
9346
+ return __a <= __b ? -1ll : 0ll;
9349
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9350
+vcled_f64 (float64_t __a, float64_t __b)
9352
+ return __a <= __b ? -1ll : 0ll;
9355
+/* vclez - vector. */
9357
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9358
+vclez_f32 (float32x2_t __a)
9360
+ float32x2_t __b = {0.0f, 0.0f};
9361
+ return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
9364
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9365
+vclez_f64 (float64x1_t __a)
9367
+ return __a <= 0.0 ? -1ll : 0ll;
9370
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9371
+vclez_p8 (poly8x8_t __a)
9373
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9374
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
9378
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9379
+vclez_s8 (int8x8_t __a)
9381
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9382
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
9385
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9386
+vclez_s16 (int16x4_t __a)
9388
+ int16x4_t __b = {0, 0, 0, 0};
9389
+ return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
9392
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9393
+vclez_s32 (int32x2_t __a)
9395
+ int32x2_t __b = {0, 0};
9396
+ return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
9399
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9400
+vclez_s64 (int64x1_t __a)
9402
+ return __a <= 0ll ? -1ll : 0ll;
9405
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9406
+vclez_u64 (uint64x1_t __a)
9408
+ return __a <= 0ll ? -1ll : 0ll;
9411
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9412
+vclezq_f32 (float32x4_t __a)
9414
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
9415
+ return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
9418
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9419
+vclezq_f64 (float64x2_t __a)
9421
+ float64x2_t __b = {0.0, 0.0};
9422
+ return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
9425
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9426
+vclezq_p8 (poly8x16_t __a)
9428
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9429
+ 0, 0, 0, 0, 0, 0, 0, 0};
9430
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
9434
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9435
+vclezq_s8 (int8x16_t __a)
9437
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9438
+ 0, 0, 0, 0, 0, 0, 0, 0};
9439
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
9442
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9443
+vclezq_s16 (int16x8_t __a)
9445
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9446
+ return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
9449
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9450
+vclezq_s32 (int32x4_t __a)
9452
+ int32x4_t __b = {0, 0, 0, 0};
9453
+ return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
9456
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9457
+vclezq_s64 (int64x2_t __a)
9459
+ int64x2_t __b = {0, 0};
9460
+ return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
9463
+/* vclez - scalar. */
9465
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9466
+vclezs_f32 (float32_t __a)
9468
+ return __a <= 0.0f ? -1 : 0;
9471
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9472
vclezd_s64 (int64x1_t __a)
9474
- return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
9475
+ return __a <= 0 ? -1ll : 0ll;
9479
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9480
+vclezd_u64 (int64x1_t __a)
9482
+ return __a <= 0 ? -1ll : 0ll;
9485
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9486
+vclezd_f64 (float64_t __a)
9488
+ return __a <= 0.0 ? -1ll : 0ll;
9491
+/* vclt - vector. */
9493
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9494
+vclt_f32 (float32x2_t __a, float32x2_t __b)
9496
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
9499
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9500
+vclt_f64 (float64x1_t __a, float64x1_t __b)
9502
+ return __a < __b ? -1ll : 0ll;
9505
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9506
+vclt_p8 (poly8x8_t __a, poly8x8_t __b)
9508
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
9512
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9513
vclt_s8 (int8x8_t __a, int8x8_t __b)
9515
return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
9516
@@ -20392,38 +19937,56 @@
9517
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9518
vclt_s64 (int64x1_t __a, int64x1_t __b)
9520
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
9521
+ return __a < __b ? -1ll : 0ll;
9524
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9525
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
9527
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
9528
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
9532
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9533
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
9535
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
9536
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
9540
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9541
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
9543
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
9544
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
9548
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9549
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
9551
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
9553
+ return __a < __b ? -1ll : 0ll;
9556
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9557
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
9559
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
9562
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9563
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
9565
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
9568
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9569
+vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
9571
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
9575
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9576
vcltq_s8 (int8x16_t __a, int8x16_t __b)
9578
return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
9579
@@ -20450,43 +20013,616 @@
9580
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9581
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
9583
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
9584
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
9588
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9589
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
9591
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
9592
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
9596
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9597
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
9599
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
9600
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
9604
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9605
vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
9607
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
9608
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
9612
+/* vclt - scalar. */
9614
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9615
+vclts_f32 (float32_t __a, float32_t __b)
9617
+ return __a < __b ? -1 : 0;
9620
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9621
vcltd_s64 (int64x1_t __a, int64x1_t __b)
9623
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
9624
+ return __a < __b ? -1ll : 0ll;
9627
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9628
+vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
9630
+ return __a < __b ? -1ll : 0ll;
9633
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9634
+vcltd_f64 (float64_t __a, float64_t __b)
9636
+ return __a < __b ? -1ll : 0ll;
9639
+/* vcltz - vector. */
9641
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9642
+vcltz_f32 (float32x2_t __a)
9644
+ float32x2_t __b = {0.0f, 0.0f};
9645
+ return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
9648
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9649
+vcltz_f64 (float64x1_t __a)
9651
+ return __a < 0.0 ? -1ll : 0ll;
9654
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9655
+vcltz_p8 (poly8x8_t __a)
9657
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9658
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
9662
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9663
+vcltz_s8 (int8x8_t __a)
9665
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9666
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
9669
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9670
+vcltz_s16 (int16x4_t __a)
9672
+ int16x4_t __b = {0, 0, 0, 0};
9673
+ return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
9676
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9677
+vcltz_s32 (int32x2_t __a)
9679
+ int32x2_t __b = {0, 0};
9680
+ return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
9683
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9684
+vcltz_s64 (int64x1_t __a)
9686
+ return __a < 0ll ? -1ll : 0ll;
9689
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9690
+vcltzq_f32 (float32x4_t __a)
9692
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
9693
+ return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
9696
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9697
+vcltzq_f64 (float64x2_t __a)
9699
+ float64x2_t __b = {0.0, 0.0};
9700
+ return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
9703
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9704
+vcltzq_p8 (poly8x16_t __a)
9706
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9707
+ 0, 0, 0, 0, 0, 0, 0, 0};
9708
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
9712
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9713
+vcltzq_s8 (int8x16_t __a)
9715
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
9716
+ 0, 0, 0, 0, 0, 0, 0, 0};
9717
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
9720
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9721
+vcltzq_s16 (int16x8_t __a)
9723
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
9724
+ return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
9727
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9728
+vcltzq_s32 (int32x4_t __a)
9730
+ int32x4_t __b = {0, 0, 0, 0};
9731
+ return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
9734
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9735
+vcltzq_s64 (int64x2_t __a)
9737
+ int64x2_t __b = {0, 0};
9738
+ return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
9741
+/* vcltz - scalar. */
9743
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9744
+vcltzs_f32 (float32_t __a)
9746
+ return __a < 0.0f ? -1 : 0;
9749
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9750
vcltzd_s64 (int64x1_t __a)
9752
- return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
9753
+ return __a < 0 ? -1ll : 0ll;
9756
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9757
+vcltzd_u64 (int64x1_t __a)
9759
+ return __a < 0 ? -1ll : 0ll;
9762
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9763
+vcltzd_f64 (float64_t __a)
9765
+ return __a < 0.0 ? -1ll : 0ll;
9768
+/* vcvt (double -> float). */
9770
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9771
+vcvt_f32_f64 (float64x2_t __a)
9773
+ return __builtin_aarch64_float_truncate_lo_v2sf (__a);
9776
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9777
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
9779
+ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
9782
+/* vcvt (float -> double). */
9784
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9785
+vcvt_f64_f32 (float32x2_t __a)
9788
+ return __builtin_aarch64_float_extend_lo_v2df (__a);
9791
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9792
+vcvt_high_f64_f32 (float32x4_t __a)
9794
+ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
9797
+/* vcvt (<u>int -> float) */
9799
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9800
+vcvtd_f64_s64 (int64_t __a)
9802
+ return (float64_t) __a;
9805
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9806
+vcvtd_f64_u64 (uint64_t __a)
9808
+ return (float64_t) __a;
9811
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9812
+vcvts_f32_s32 (int32_t __a)
9814
+ return (float32_t) __a;
9817
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9818
+vcvts_f32_u32 (uint32_t __a)
9820
+ return (float32_t) __a;
9823
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9824
+vcvt_f32_s32 (int32x2_t __a)
9826
+ return __builtin_aarch64_floatv2siv2sf (__a);
9829
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9830
+vcvt_f32_u32 (uint32x2_t __a)
9832
+ return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
9835
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9836
+vcvtq_f32_s32 (int32x4_t __a)
9838
+ return __builtin_aarch64_floatv4siv4sf (__a);
9841
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9842
+vcvtq_f32_u32 (uint32x4_t __a)
9844
+ return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
9847
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9848
+vcvtq_f64_s64 (int64x2_t __a)
9850
+ return __builtin_aarch64_floatv2div2df (__a);
9853
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9854
+vcvtq_f64_u64 (uint64x2_t __a)
9856
+ return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
9859
+/* vcvt (float -> <u>int) */
9861
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
9862
+vcvtd_s64_f64 (float64_t __a)
9864
+ return (int64_t) __a;
9867
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9868
+vcvtd_u64_f64 (float64_t __a)
9870
+ return (uint64_t) __a;
9873
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
9874
+vcvts_s32_f32 (float32_t __a)
9876
+ return (int32_t) __a;
9879
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9880
+vcvts_u32_f32 (float32_t __a)
9882
+ return (uint32_t) __a;
9885
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9886
+vcvt_s32_f32 (float32x2_t __a)
9888
+ return __builtin_aarch64_lbtruncv2sfv2si (__a);
9891
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9892
+vcvt_u32_f32 (float32x2_t __a)
9894
+ /* TODO: This cast should go away when builtins have
9895
+ their correct types. */
9896
+ return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
9899
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9900
+vcvtq_s32_f32 (float32x4_t __a)
9902
+ return __builtin_aarch64_lbtruncv4sfv4si (__a);
9905
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9906
+vcvtq_u32_f32 (float32x4_t __a)
9908
+ /* TODO: This cast should go away when builtins have
9909
+ their correct types. */
9910
+ return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
9913
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9914
+vcvtq_s64_f64 (float64x2_t __a)
9916
+ return __builtin_aarch64_lbtruncv2dfv2di (__a);
9919
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9920
+vcvtq_u64_f64 (float64x2_t __a)
9922
+ /* TODO: This cast should go away when builtins have
9923
+ their correct types. */
9924
+ return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
9929
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
9930
+vcvtad_s64_f64 (float64_t __a)
9932
+ return __builtin_aarch64_lrounddfdi (__a);
9935
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9936
+vcvtad_u64_f64 (float64_t __a)
9938
+ return __builtin_aarch64_lroundudfdi (__a);
9941
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
9942
+vcvtas_s32_f32 (float32_t __a)
9944
+ return __builtin_aarch64_lroundsfsi (__a);
9947
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9948
+vcvtas_u32_f32 (float32_t __a)
9950
+ return __builtin_aarch64_lroundusfsi (__a);
9953
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9954
+vcvta_s32_f32 (float32x2_t __a)
9956
+ return __builtin_aarch64_lroundv2sfv2si (__a);
9959
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9960
+vcvta_u32_f32 (float32x2_t __a)
9962
+ /* TODO: This cast should go away when builtins have
9963
+ their correct types. */
9964
+ return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
9967
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9968
+vcvtaq_s32_f32 (float32x4_t __a)
9970
+ return __builtin_aarch64_lroundv4sfv4si (__a);
9973
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9974
+vcvtaq_u32_f32 (float32x4_t __a)
9976
+ /* TODO: This cast should go away when builtins have
9977
+ their correct types. */
9978
+ return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
9981
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9982
+vcvtaq_s64_f64 (float64x2_t __a)
9984
+ return __builtin_aarch64_lroundv2dfv2di (__a);
9987
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9988
+vcvtaq_u64_f64 (float64x2_t __a)
9990
+ /* TODO: This cast should go away when builtins have
9991
+ their correct types. */
9992
+ return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
9997
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
9998
+vcvtmd_s64_f64 (float64_t __a)
10000
+ return __builtin_lfloor (__a);
10003
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10004
+vcvtmd_u64_f64 (float64_t __a)
10006
+ return __builtin_aarch64_lfloorudfdi (__a);
10009
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10010
+vcvtms_s32_f32 (float32_t __a)
10012
+ return __builtin_ifloorf (__a);
10015
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10016
+vcvtms_u32_f32 (float32_t __a)
10018
+ return __builtin_aarch64_lfloorusfsi (__a);
10021
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10022
+vcvtm_s32_f32 (float32x2_t __a)
10024
+ return __builtin_aarch64_lfloorv2sfv2si (__a);
10027
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10028
+vcvtm_u32_f32 (float32x2_t __a)
10030
+ /* TODO: This cast should go away when builtins have
10031
+ their correct types. */
10032
+ return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
10035
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10036
+vcvtmq_s32_f32 (float32x4_t __a)
10038
+ return __builtin_aarch64_lfloorv4sfv4si (__a);
10041
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10042
+vcvtmq_u32_f32 (float32x4_t __a)
10044
+ /* TODO: This cast should go away when builtins have
10045
+ their correct types. */
10046
+ return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
10049
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10050
+vcvtmq_s64_f64 (float64x2_t __a)
10052
+ return __builtin_aarch64_lfloorv2dfv2di (__a);
10055
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10056
+vcvtmq_u64_f64 (float64x2_t __a)
10058
+ /* TODO: This cast should go away when builtins have
10059
+ their correct types. */
10060
+ return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
10065
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
10066
+vcvtnd_s64_f64 (float64_t __a)
10068
+ return __builtin_aarch64_lfrintndfdi (__a);
10071
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10072
+vcvtnd_u64_f64 (float64_t __a)
10074
+ return __builtin_aarch64_lfrintnudfdi (__a);
10077
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10078
+vcvtns_s32_f32 (float32_t __a)
10080
+ return __builtin_aarch64_lfrintnsfsi (__a);
10083
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10084
+vcvtns_u32_f32 (float32_t __a)
10086
+ return __builtin_aarch64_lfrintnusfsi (__a);
10089
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10090
+vcvtn_s32_f32 (float32x2_t __a)
10092
+ return __builtin_aarch64_lfrintnv2sfv2si (__a);
10095
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10096
+vcvtn_u32_f32 (float32x2_t __a)
10098
+ /* TODO: This cast should go away when builtins have
10099
+ their correct types. */
10100
+ return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
10103
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10104
+vcvtnq_s32_f32 (float32x4_t __a)
10106
+ return __builtin_aarch64_lfrintnv4sfv4si (__a);
10109
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10110
+vcvtnq_u32_f32 (float32x4_t __a)
10112
+ /* TODO: This cast should go away when builtins have
10113
+ their correct types. */
10114
+ return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
10117
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10118
+vcvtnq_s64_f64 (float64x2_t __a)
10120
+ return __builtin_aarch64_lfrintnv2dfv2di (__a);
10123
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10124
+vcvtnq_u64_f64 (float64x2_t __a)
10126
+ /* TODO: This cast should go away when builtins have
10127
+ their correct types. */
10128
+ return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
10133
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
10134
+vcvtpd_s64_f64 (float64_t __a)
10136
+ return __builtin_lceil (__a);
10139
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10140
+vcvtpd_u64_f64 (float64_t __a)
10142
+ return __builtin_aarch64_lceiludfdi (__a);
10145
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
10146
+vcvtps_s32_f32 (float32_t __a)
10148
+ return __builtin_iceilf (__a);
10151
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10152
+vcvtps_u32_f32 (float32_t __a)
10154
+ return __builtin_aarch64_lceilusfsi (__a);
10157
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10158
+vcvtp_s32_f32 (float32x2_t __a)
10160
+ return __builtin_aarch64_lceilv2sfv2si (__a);
10163
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10164
+vcvtp_u32_f32 (float32x2_t __a)
10166
+ /* TODO: This cast should go away when builtins have
10167
+ their correct types. */
10168
+ return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
10171
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10172
+vcvtpq_s32_f32 (float32x4_t __a)
10174
+ return __builtin_aarch64_lceilv4sfv4si (__a);
10177
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10178
+vcvtpq_u32_f32 (float32x4_t __a)
10180
+ /* TODO: This cast should go away when builtins have
10181
+ their correct types. */
10182
+ return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
10185
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10186
+vcvtpq_s64_f64 (float64x2_t __a)
10188
+ return __builtin_aarch64_lceilv2dfv2di (__a);
10191
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10192
+vcvtpq_u64_f64 (float64x2_t __a)
10194
+ /* TODO: This cast should go away when builtins have
10195
+ their correct types. */
10196
+ return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
10201
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
10202
@@ -23115,6 +23251,223 @@
10203
return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
10208
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10209
+vrecpes_f32 (float32_t __a)
10211
+ return __builtin_aarch64_frecpesf (__a);
10214
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10215
+vrecped_f64 (float64_t __a)
10217
+ return __builtin_aarch64_frecpedf (__a);
10220
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10221
+vrecpe_f32 (float32x2_t __a)
10223
+ return __builtin_aarch64_frecpev2sf (__a);
10226
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10227
+vrecpeq_f32 (float32x4_t __a)
10229
+ return __builtin_aarch64_frecpev4sf (__a);
10232
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10233
+vrecpeq_f64 (float64x2_t __a)
10235
+ return __builtin_aarch64_frecpev2df (__a);
10240
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10241
+vrecpss_f32 (float32_t __a, float32_t __b)
10243
+ return __builtin_aarch64_frecpssf (__a, __b);
10246
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10247
+vrecpsd_f64 (float64_t __a, float64_t __b)
10249
+ return __builtin_aarch64_frecpsdf (__a, __b);
10252
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10253
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
10255
+ return __builtin_aarch64_frecpsv2sf (__a, __b);
10258
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10259
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
10261
+ return __builtin_aarch64_frecpsv4sf (__a, __b);
10264
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10265
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
10267
+ return __builtin_aarch64_frecpsv2df (__a, __b);
10272
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10273
+vrecpxs_f32 (float32_t __a)
10275
+ return __builtin_aarch64_frecpxsf (__a);
10278
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10279
+vrecpxd_f64 (float64_t __a)
10281
+ return __builtin_aarch64_frecpxdf (__a);
10286
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10287
+vrnd_f32 (float32x2_t __a)
10289
+ return __builtin_aarch64_btruncv2sf (__a);
10292
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10293
+vrndq_f32 (float32x4_t __a)
10295
+ return __builtin_aarch64_btruncv4sf (__a);
10298
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10299
+vrndq_f64 (float64x2_t __a)
10301
+ return __builtin_aarch64_btruncv2df (__a);
10306
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10307
+vrnda_f32 (float32x2_t __a)
10309
+ return __builtin_aarch64_roundv2sf (__a);
10312
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10313
+vrndaq_f32 (float32x4_t __a)
10315
+ return __builtin_aarch64_roundv4sf (__a);
10318
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10319
+vrndaq_f64 (float64x2_t __a)
10321
+ return __builtin_aarch64_roundv2df (__a);
10326
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10327
+vrndi_f32 (float32x2_t __a)
10329
+ return __builtin_aarch64_nearbyintv2sf (__a);
10332
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10333
+vrndiq_f32 (float32x4_t __a)
10335
+ return __builtin_aarch64_nearbyintv4sf (__a);
10338
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10339
+vrndiq_f64 (float64x2_t __a)
10341
+ return __builtin_aarch64_nearbyintv2df (__a);
10346
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10347
+vrndm_f32 (float32x2_t __a)
10349
+ return __builtin_aarch64_floorv2sf (__a);
10352
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10353
+vrndmq_f32 (float32x4_t __a)
10355
+ return __builtin_aarch64_floorv4sf (__a);
10358
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10359
+vrndmq_f64 (float64x2_t __a)
10361
+ return __builtin_aarch64_floorv2df (__a);
10366
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10367
+vrndn_f32 (float32x2_t __a)
10369
+ return __builtin_aarch64_frintnv2sf (__a);
10371
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10372
+vrndnq_f32 (float32x4_t __a)
10374
+ return __builtin_aarch64_frintnv4sf (__a);
10377
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10378
+vrndnq_f64 (float64x2_t __a)
10380
+ return __builtin_aarch64_frintnv2df (__a);
10385
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10386
+vrndp_f32 (float32x2_t __a)
10388
+ return __builtin_aarch64_ceilv2sf (__a);
10391
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10392
+vrndpq_f32 (float32x4_t __a)
10394
+ return __builtin_aarch64_ceilv4sf (__a);
10397
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10398
+vrndpq_f64 (float64x2_t __a)
10400
+ return __builtin_aarch64_ceilv2df (__a);
10405
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10406
+vrndx_f32 (float32x2_t __a)
10408
+ return __builtin_aarch64_rintv2sf (__a);
10411
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10412
+vrndxq_f32 (float32x4_t __a)
10414
+ return __builtin_aarch64_rintv4sf (__a);
10417
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10418
+vrndxq_f64 (float64x2_t __a)
10420
+ return __builtin_aarch64_rintv2df (__a);
10425
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10426
@@ -23458,109 +23811,109 @@
10427
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10428
vshl_n_s8 (int8x8_t __a, const int __b)
10430
- return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b);
10431
+ return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
10434
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10435
vshl_n_s16 (int16x4_t __a, const int __b)
10437
- return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b);
10438
+ return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
10441
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10442
vshl_n_s32 (int32x2_t __a, const int __b)
10444
- return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b);
10445
+ return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
10448
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10449
vshl_n_s64 (int64x1_t __a, const int __b)
10451
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
10452
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
10455
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10456
vshl_n_u8 (uint8x8_t __a, const int __b)
10458
- return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b);
10459
+ return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
10462
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10463
vshl_n_u16 (uint16x4_t __a, const int __b)
10465
- return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b);
10466
+ return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
10469
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10470
vshl_n_u32 (uint32x2_t __a, const int __b)
10472
- return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b);
10473
+ return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
10476
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10477
vshl_n_u64 (uint64x1_t __a, const int __b)
10479
- return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b);
10480
+ return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
10483
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10484
vshlq_n_s8 (int8x16_t __a, const int __b)
10486
- return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b);
10487
+ return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
10490
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10491
vshlq_n_s16 (int16x8_t __a, const int __b)
10493
- return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b);
10494
+ return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
10497
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10498
vshlq_n_s32 (int32x4_t __a, const int __b)
10500
- return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b);
10501
+ return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
10504
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10505
vshlq_n_s64 (int64x2_t __a, const int __b)
10507
- return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b);
10508
+ return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
10511
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10512
vshlq_n_u8 (uint8x16_t __a, const int __b)
10514
- return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b);
10515
+ return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
10518
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10519
vshlq_n_u16 (uint16x8_t __a, const int __b)
10521
- return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b);
10522
+ return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
10525
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10526
vshlq_n_u32 (uint32x4_t __a, const int __b)
10528
- return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b);
10529
+ return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
10532
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10533
vshlq_n_u64 (uint64x2_t __a, const int __b)
10535
- return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b);
10536
+ return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
10539
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10540
vshld_n_s64 (int64x1_t __a, const int __b)
10542
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
10543
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
10546
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10547
vshld_n_u64 (uint64x1_t __a, const int __b)
10549
- return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b);
10550
+ return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
10553
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10554
@@ -23748,109 +24101,109 @@
10555
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10556
vshr_n_s8 (int8x8_t __a, const int __b)
10558
- return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b);
10559
+ return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
10562
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10563
vshr_n_s16 (int16x4_t __a, const int __b)
10565
- return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b);
10566
+ return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
10569
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10570
vshr_n_s32 (int32x2_t __a, const int __b)
10572
- return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b);
10573
+ return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
10576
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10577
vshr_n_s64 (int64x1_t __a, const int __b)
10579
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
10580
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
10583
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10584
vshr_n_u8 (uint8x8_t __a, const int __b)
10586
- return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b);
10587
+ return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
10590
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10591
vshr_n_u16 (uint16x4_t __a, const int __b)
10593
- return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b);
10594
+ return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
10597
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10598
vshr_n_u32 (uint32x2_t __a, const int __b)
10600
- return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b);
10601
+ return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
10604
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10605
vshr_n_u64 (uint64x1_t __a, const int __b)
10607
- return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b);
10608
+ return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
10611
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10612
vshrq_n_s8 (int8x16_t __a, const int __b)
10614
- return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b);
10615
+ return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
10618
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10619
vshrq_n_s16 (int16x8_t __a, const int __b)
10621
- return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b);
10622
+ return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
10625
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10626
vshrq_n_s32 (int32x4_t __a, const int __b)
10628
- return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b);
10629
+ return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
10632
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10633
vshrq_n_s64 (int64x2_t __a, const int __b)
10635
- return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b);
10636
+ return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
10639
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10640
vshrq_n_u8 (uint8x16_t __a, const int __b)
10642
- return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b);
10643
+ return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
10646
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10647
vshrq_n_u16 (uint16x8_t __a, const int __b)
10649
- return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b);
10650
+ return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
10653
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10654
vshrq_n_u32 (uint32x4_t __a, const int __b)
10656
- return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b);
10657
+ return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
10660
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10661
vshrq_n_u64 (uint64x2_t __a, const int __b)
10663
- return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b);
10664
+ return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
10667
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10668
vshrd_n_s64 (int64x1_t __a, const int __b)
10670
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
10671
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
10674
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10675
vshrd_n_u64 (uint64x1_t __a, const int __b)
10677
- return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b);
10678
+ return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
10682
@@ -25320,7 +25673,7 @@
10683
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10684
vtst_s64 (int64x1_t __a, int64x1_t __b)
10686
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
10687
+ return (__a & __b) ? -1ll : 0ll;
10690
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10691
@@ -25347,8 +25700,7 @@
10692
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10693
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
10695
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
10696
- (int64x1_t) __b);
10697
+ return (__a & __b) ? -1ll : 0ll;
10700
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10701
@@ -25406,14 +25758,13 @@
10702
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10703
vtstd_s64 (int64x1_t __a, int64x1_t __b)
10705
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
10706
+ return (__a & __b) ? -1ll : 0ll;
10709
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10710
vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
10712
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
10713
- (int64x1_t) __b);
10714
+ return (__a & __b) ? -1ll : 0ll;
10718
--- a/src/gcc/config/aarch64/aarch64.md
10719
+++ b/src/gcc/config/aarch64/aarch64.md
10721
(define_c_enum "unspec" [
10734
@@ -230,6 +234,9 @@
10744
@@ -763,19 +770,24 @@
10747
(define_insn "*mov<mode>_aarch64"
10748
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,r,m, r,*w")
10749
- (match_operand:SHORT 1 "general_operand" " r,M,m,rZ,*w,r"))]
10750
+ [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w")
10751
+ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
10752
"(register_operand (operands[0], <MODE>mode)
10753
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
10757
+ movi\\t%0.<Vallxd>, %1
10758
ldr<size>\\t%w0, %1
10759
+ ldr\\t%<size>0, %1
10760
str<size>\\t%w1, %0
10761
+ str\\t%<size>1, %0
10762
umov\\t%w0, %1.<v>[0]
10763
- dup\\t%0.<Vallxd>, %w1"
10764
- [(set_attr "v8type" "move,alu,load1,store1,*,*")
10765
- (set_attr "simd_type" "*,*,*,*,simd_movgp,simd_dupgp")
10766
+ dup\\t%0.<Vallxd>, %w1
10767
+ dup\\t%0, %1.<v>[0]"
10768
+ [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
10769
+ (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
10770
+ (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
10771
(set_attr "mode" "<MODE>")
10772
(set_attr "simd_mode" "<MODE>")]
10774
@@ -797,26 +809,28 @@
10777
(define_insn "*movsi_aarch64"
10778
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m, *w, r,*w")
10779
- (match_operand:SI 1 "aarch64_mov_operand" " r,M,m,rZ,rZ,*w,*w"))]
10780
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,*w,m, m,*w, r,*w")
10781
+ (match_operand:SI 1 "aarch64_mov_operand" " r,M,m, m,rZ,*w,rZ,*w,*w"))]
10782
"(register_operand (operands[0], SImode)
10783
|| aarch64_reg_or_zero (operands[1], SImode))"
10794
- [(set_attr "v8type" "move,alu,load1,store1,fmov,fmov,fmov")
10795
+ [(set_attr "v8type" "move,alu,load1,load1,store1,store1,fmov,fmov,fmov")
10796
(set_attr "mode" "SI")
10797
- (set_attr "fp" "*,*,*,*,yes,yes,yes")]
10798
+ (set_attr "fp" "*,*,*,*,*,*,yes,yes,yes")]
10801
(define_insn "*movdi_aarch64"
10802
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,m, r, r, *w, r,*w,w")
10803
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m,rZ,Usa,Ush,rZ,*w,*w,Dd"))]
10804
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r, r, *w, r,*w,w")
10805
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,Usa,Ush,rZ,*w,*w,Dd"))]
10806
"(register_operand (operands[0], DImode)
10807
|| aarch64_reg_or_zero (operands[1], DImode))"
10809
@@ -825,16 +839,18 @@
10822
- [(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov")
10823
+ [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
10824
(set_attr "mode" "DI")
10825
- (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
10826
+ (set_attr "fp" "*,*,*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
10829
(define_insn "insv_imm<mode>"
10830
@@ -1149,13 +1165,14 @@
10833
(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
10834
- [(set (match_operand:GPI 0 "register_operand" "=r,r")
10835
- (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
10836
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
10837
+ (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
10840
uxt<SHORT:size>\t%<GPI:w>0, %w1
10841
- ldr<SHORT:size>\t%w0, %1"
10842
- [(set_attr "v8type" "extend,load1")
10843
+ ldr<SHORT:size>\t%w0, %1
10844
+ ldr\t%<SHORT:size>0, %1"
10845
+ [(set_attr "v8type" "extend,load1,load1")
10846
(set_attr "mode" "<GPI:MODE>")]
10849
@@ -1286,6 +1303,112 @@
10850
(set_attr "mode" "SI")]
10853
+(define_insn "*adds_mul_imm_<mode>"
10854
+ [(set (reg:CC_NZ CC_REGNUM)
10856
+ (plus:GPI (mult:GPI
10857
+ (match_operand:GPI 1 "register_operand" "r")
10858
+ (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
10859
+ (match_operand:GPI 3 "register_operand" "rk"))
10861
+ (set (match_operand:GPI 0 "register_operand" "=r")
10862
+ (plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
10865
+ "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
10866
+ [(set_attr "v8type" "alus_shift")
10867
+ (set_attr "mode" "<MODE>")]
10870
+(define_insn "*subs_mul_imm_<mode>"
10871
+ [(set (reg:CC_NZ CC_REGNUM)
10873
+ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
10875
+ (match_operand:GPI 2 "register_operand" "r")
10876
+ (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
10878
+ (set (match_operand:GPI 0 "register_operand" "=r")
10879
+ (minus:GPI (match_dup 1)
10880
+ (mult:GPI (match_dup 2) (match_dup 3))))]
10882
+ "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
10883
+ [(set_attr "v8type" "alus_shift")
10884
+ (set_attr "mode" "<MODE>")]
10887
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
10888
+ [(set (reg:CC_NZ CC_REGNUM)
10891
+ (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
10892
+ (match_operand:GPI 2 "register_operand" "r"))
10894
+ (set (match_operand:GPI 0 "register_operand" "=r")
10895
+ (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
10897
+ "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
10898
+ [(set_attr "v8type" "alus_ext")
10899
+ (set_attr "mode" "<GPI:MODE>")]
10902
+(define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
10903
+ [(set (reg:CC_NZ CC_REGNUM)
10905
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
10907
+ (match_operand:ALLX 2 "register_operand" "r")))
10909
+ (set (match_operand:GPI 0 "register_operand" "=r")
10910
+ (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
10912
+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
10913
+ [(set_attr "v8type" "alus_ext")
10914
+ (set_attr "mode" "<GPI:MODE>")]
10917
+(define_insn "*adds_<optab><mode>_multp2"
10918
+ [(set (reg:CC_NZ CC_REGNUM)
10920
+ (plus:GPI (ANY_EXTRACT:GPI
10921
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
10922
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
10923
+ (match_operand 3 "const_int_operand" "n")
10925
+ (match_operand:GPI 4 "register_operand" "r"))
10927
+ (set (match_operand:GPI 0 "register_operand" "=r")
10928
+ (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
10932
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
10933
+ "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
10934
+ [(set_attr "v8type" "alus_ext")
10935
+ (set_attr "mode" "<MODE>")]
10938
+(define_insn "*subs_<optab><mode>_multp2"
10939
+ [(set (reg:CC_NZ CC_REGNUM)
10941
+ (minus:GPI (match_operand:GPI 4 "register_operand" "r")
10943
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
10944
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
10945
+ (match_operand 3 "const_int_operand" "n")
10948
+ (set (match_operand:GPI 0 "register_operand" "=r")
10949
+ (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
10950
+ (mult:GPI (match_dup 1) (match_dup 2))
10952
+ (const_int 0))))]
10953
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
10954
+ "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
10955
+ [(set_attr "v8type" "alus_ext")
10956
+ (set_attr "mode" "<MODE>")]
10959
(define_insn "*add<mode>3nr_compare0"
10960
[(set (reg:CC_NZ CC_REGNUM)
10962
@@ -1790,6 +1913,34 @@
10963
(set_attr "mode" "SI")]
10966
+(define_insn "*sub<mode>3_carryin"
10968
+ (match_operand:GPI 0 "register_operand" "=r")
10969
+ (minus:GPI (minus:GPI
10970
+ (match_operand:GPI 1 "register_operand" "r")
10971
+ (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
10972
+ (match_operand:GPI 2 "register_operand" "r")))]
10974
+ "sbc\\t%<w>0, %<w>1, %<w>2"
10975
+ [(set_attr "v8type" "adc")
10976
+ (set_attr "mode" "<MODE>")]
10979
+;; zero_extend version of the above
10980
+(define_insn "*subsi3_carryin_uxtw"
10982
+ (match_operand:DI 0 "register_operand" "=r")
10984
+ (minus:SI (minus:SI
10985
+ (match_operand:SI 1 "register_operand" "r")
10986
+ (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
10987
+ (match_operand:SI 2 "register_operand" "r"))))]
10989
+ "sbc\\t%w0, %w1, %w2"
10990
+ [(set_attr "v8type" "adc")
10991
+ (set_attr "mode" "SI")]
10994
(define_insn "*sub_uxt<mode>_multp2"
10995
[(set (match_operand:GPI 0 "register_operand" "=rk")
10996
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
10997
@@ -1843,6 +1994,27 @@
10998
(set_attr "mode" "SI")]
11001
+(define_insn "*ngc<mode>"
11002
+ [(set (match_operand:GPI 0 "register_operand" "=r")
11003
+ (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
11004
+ (match_operand:GPI 1 "register_operand" "r")))]
11006
+ "ngc\\t%<w>0, %<w>1"
11007
+ [(set_attr "v8type" "adc")
11008
+ (set_attr "mode" "<MODE>")]
11011
+(define_insn "*ngcsi_uxtw"
11012
+ [(set (match_operand:DI 0 "register_operand" "=r")
11014
+ (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
11015
+ (match_operand:SI 1 "register_operand" "r"))))]
11018
+ [(set_attr "v8type" "adc")
11019
+ (set_attr "mode" "SI")]
11022
(define_insn "*neg<mode>2_compare0"
11023
[(set (reg:CC_NZ CC_REGNUM)
11024
(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
11025
@@ -1868,6 +2040,21 @@
11026
(set_attr "mode" "SI")]
11029
+(define_insn "*neg_<shift><mode>3_compare0"
11030
+ [(set (reg:CC_NZ CC_REGNUM)
11032
+ (neg:GPI (ASHIFT:GPI
11033
+ (match_operand:GPI 1 "register_operand" "r")
11034
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
11036
+ (set (match_operand:GPI 0 "register_operand" "=r")
11037
+ (neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))]
11039
+ "negs\\t%<w>0, %<w>1, <shift> %2"
11040
+ [(set_attr "v8type" "alus_shift")
11041
+ (set_attr "mode" "<MODE>")]
11044
(define_insn "*neg_<shift>_<mode>2"
11045
[(set (match_operand:GPI 0 "register_operand" "=r")
11046
(neg:GPI (ASHIFT:GPI
11047
@@ -2157,6 +2344,18 @@
11048
(set_attr "mode" "<GPI:MODE>")]
11051
+(define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
11052
+ [(set (reg:CC_SWP CC_REGNUM)
11053
+ (compare:CC_SWP (ashift:GPI
11055
+ (match_operand:ALLX 0 "register_operand" "r"))
11056
+ (match_operand 1 "aarch64_imm3" "Ui3"))
11057
+ (match_operand:GPI 2 "register_operand" "r")))]
11059
+ "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
11060
+ [(set_attr "v8type" "alus_ext")
11061
+ (set_attr "mode" "<GPI:MODE>")]
11064
;; -------------------------------------------------------------------
11065
;; Store-flag and conditional select insns
11066
@@ -2210,7 +2409,7 @@
11067
(set_attr "mode" "SI")]
11070
-(define_insn "*cstore<mode>_neg"
11071
+(define_insn "cstore<mode>_neg"
11072
[(set (match_operand:ALLI 0 "register_operand" "=r")
11073
(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
11074
[(match_operand 2 "cc_register" "") (const_int 0)])))]
11075
@@ -2433,6 +2632,69 @@
11076
[(set_attr "v8type" "logic,logic_imm")
11077
(set_attr "mode" "SI")])
11079
+(define_insn "*and<mode>3_compare0"
11080
+ [(set (reg:CC_NZ CC_REGNUM)
11082
+ (and:GPI (match_operand:GPI 1 "register_operand" "%r,r")
11083
+ (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>"))
11085
+ (set (match_operand:GPI 0 "register_operand" "=r,r")
11086
+ (and:GPI (match_dup 1) (match_dup 2)))]
11088
+ "ands\\t%<w>0, %<w>1, %<w>2"
11089
+ [(set_attr "v8type" "logics,logics_imm")
11090
+ (set_attr "mode" "<MODE>")]
11093
+;; zero_extend version of above
11094
+(define_insn "*andsi3_compare0_uxtw"
11095
+ [(set (reg:CC_NZ CC_REGNUM)
11097
+ (and:SI (match_operand:SI 1 "register_operand" "%r,r")
11098
+ (match_operand:SI 2 "aarch64_logical_operand" "r,K"))
11100
+ (set (match_operand:DI 0 "register_operand" "=r,r")
11101
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
11103
+ "ands\\t%w0, %w1, %w2"
11104
+ [(set_attr "v8type" "logics,logics_imm")
11105
+ (set_attr "mode" "SI")]
11108
+(define_insn "*and_<SHIFT:optab><mode>3_compare0"
11109
+ [(set (reg:CC_NZ CC_REGNUM)
11111
+ (and:GPI (SHIFT:GPI
11112
+ (match_operand:GPI 1 "register_operand" "r")
11113
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
11114
+ (match_operand:GPI 3 "register_operand" "r"))
11116
+ (set (match_operand:GPI 0 "register_operand" "=r")
11117
+ (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
11119
+ "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
11120
+ [(set_attr "v8type" "logics_shift")
11121
+ (set_attr "mode" "<MODE>")]
11124
+;; zero_extend version of above
11125
+(define_insn "*and_<SHIFT:optab>si3_compare0_uxtw"
11126
+ [(set (reg:CC_NZ CC_REGNUM)
11128
+ (and:SI (SHIFT:SI
11129
+ (match_operand:SI 1 "register_operand" "r")
11130
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
11131
+ (match_operand:SI 3 "register_operand" "r"))
11133
+ (set (match_operand:DI 0 "register_operand" "=r")
11134
+ (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2))
11135
+ (match_dup 3))))]
11137
+ "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
11138
+ [(set_attr "v8type" "logics_shift")
11139
+ (set_attr "mode" "SI")]
11142
(define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
11143
[(set (match_operand:GPI 0 "register_operand" "=r")
11144
(LOGICAL:GPI (SHIFT:GPI
11145
@@ -2703,6 +2965,62 @@
11146
(set_attr "mode" "<MODE>")]
11149
+(define_insn "*extr<mode>5_insn"
11150
+ [(set (match_operand:GPI 0 "register_operand" "=r")
11151
+ (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
11152
+ (match_operand 3 "const_int_operand" "n"))
11153
+ (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
11154
+ (match_operand 4 "const_int_operand" "n"))))]
11155
+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
11156
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
11157
+ "extr\\t%<w>0, %<w>1, %<w>2, %4"
11158
+ [(set_attr "v8type" "shift")
11159
+ (set_attr "mode" "<MODE>")]
11162
+;; zero_extend version of the above
11163
+(define_insn "*extrsi5_insn_uxtw"
11164
+ [(set (match_operand:DI 0 "register_operand" "=r")
11166
+ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
11167
+ (match_operand 3 "const_int_operand" "n"))
11168
+ (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
11169
+ (match_operand 4 "const_int_operand" "n")))))]
11170
+ "UINTVAL (operands[3]) < 32 &&
11171
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
11172
+ "extr\\t%w0, %w1, %w2, %4"
11173
+ [(set_attr "v8type" "shift")
11174
+ (set_attr "mode" "SI")]
11177
+(define_insn "*ror<mode>3_insn"
11178
+ [(set (match_operand:GPI 0 "register_operand" "=r")
11179
+ (rotate:GPI (match_operand:GPI 1 "register_operand" "r")
11180
+ (match_operand 2 "const_int_operand" "n")))]
11181
+ "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
11183
+ operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
11184
+ return "ror\\t%<w>0, %<w>1, %3";
11186
+ [(set_attr "v8type" "shift")
11187
+ (set_attr "mode" "<MODE>")]
11190
+;; zero_extend version of the above
11191
+(define_insn "*rorsi3_insn_uxtw"
11192
+ [(set (match_operand:DI 0 "register_operand" "=r")
11194
+ (rotate:SI (match_operand:SI 1 "register_operand" "r")
11195
+ (match_operand 2 "const_int_operand" "n"))))]
11196
+ "UINTVAL (operands[2]) < 32"
11198
+ operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
11199
+ return "ror\\t%w0, %w1, %3";
11201
+ [(set_attr "v8type" "shift")
11202
+ (set_attr "mode" "SI")]
11205
(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
11206
[(set (match_operand:GPI 0 "register_operand" "=r")
11208
@@ -3089,6 +3407,27 @@
11209
(set_attr "mode" "<MODE>")]
11212
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
11213
+ [(set (match_operand:GPF 0 "register_operand" "=w")
11214
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
11217
+ "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
11218
+ [(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
11219
+ (set_attr "mode" "<MODE>")]
11222
+(define_insn "aarch64_frecps<mode>"
11223
+ [(set (match_operand:GPF 0 "register_operand" "=w")
11224
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
11225
+ (match_operand:GPF 2 "register_operand" "w")]
11228
+ "frecps\\t%<s>0, %<s>1, %<s>2"
11229
+ [(set_attr "v8type" "frecps")
11230
+ (set_attr "mode" "<MODE>")]
11233
;; -------------------------------------------------------------------
11235
;; -------------------------------------------------------------------
11236
--- a/src/gcc/config/aarch64/aarch64-builtins.c
11237
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
11239
#include "langhooks.h"
11240
#include "diagnostic-core.h"
11241
#include "optabs.h"
11242
+#include "gimple.h"
11244
enum aarch64_simd_builtin_type_mode
11258
+#define sf_UP T_SF
11262
@@ -128,123 +131,136 @@
11263
unsigned int fcode;
11264
} aarch64_simd_builtin_datum;
11266
-#define CF(N, X) CODE_FOR_aarch64_##N##X
11267
+#define CF0(N, X) CODE_FOR_aarch64_##N##X
11268
+#define CF1(N, X) CODE_FOR_##N##X##1
11269
+#define CF2(N, X) CODE_FOR_##N##X##2
11270
+#define CF3(N, X) CODE_FOR_##N##X##3
11271
+#define CF4(N, X) CODE_FOR_##N##X##4
11272
+#define CF10(N, X) CODE_FOR_##N##X
11274
-#define VAR1(T, N, A) \
11275
- {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0},
11276
-#define VAR2(T, N, A, B) \
11279
-#define VAR3(T, N, A, B, C) \
11280
- VAR2 (T, N, A, B) \
11282
-#define VAR4(T, N, A, B, C, D) \
11283
- VAR3 (T, N, A, B, C) \
11285
-#define VAR5(T, N, A, B, C, D, E) \
11286
- VAR4 (T, N, A, B, C, D) \
11288
-#define VAR6(T, N, A, B, C, D, E, F) \
11289
- VAR5 (T, N, A, B, C, D, E) \
11291
-#define VAR7(T, N, A, B, C, D, E, F, G) \
11292
- VAR6 (T, N, A, B, C, D, E, F) \
11294
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
11295
- VAR7 (T, N, A, B, C, D, E, F, G) \
11297
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
11298
- VAR8 (T, N, A, B, C, D, E, F, G, H) \
11300
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
11301
- VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
11303
-#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
11304
- VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
11306
-#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
11307
- VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
11309
+#define VAR1(T, N, MAP, A) \
11310
+ {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0},
11311
+#define VAR2(T, N, MAP, A, B) \
11312
+ VAR1 (T, N, MAP, A) \
11313
+ VAR1 (T, N, MAP, B)
11314
+#define VAR3(T, N, MAP, A, B, C) \
11315
+ VAR2 (T, N, MAP, A, B) \
11316
+ VAR1 (T, N, MAP, C)
11317
+#define VAR4(T, N, MAP, A, B, C, D) \
11318
+ VAR3 (T, N, MAP, A, B, C) \
11319
+ VAR1 (T, N, MAP, D)
11320
+#define VAR5(T, N, MAP, A, B, C, D, E) \
11321
+ VAR4 (T, N, MAP, A, B, C, D) \
11322
+ VAR1 (T, N, MAP, E)
11323
+#define VAR6(T, N, MAP, A, B, C, D, E, F) \
11324
+ VAR5 (T, N, MAP, A, B, C, D, E) \
11325
+ VAR1 (T, N, MAP, F)
11326
+#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
11327
+ VAR6 (T, N, MAP, A, B, C, D, E, F) \
11328
+ VAR1 (T, N, MAP, G)
11329
+#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
11330
+ VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
11331
+ VAR1 (T, N, MAP, H)
11332
+#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
11333
+ VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
11334
+ VAR1 (T, N, MAP, I)
11335
+#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
11336
+ VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
11337
+ VAR1 (T, N, MAP, J)
11338
+#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
11339
+ VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
11340
+ VAR1 (T, N, MAP, K)
11341
+#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
11342
+ VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
11343
+ VAR1 (T, N, MAP, L)
11345
/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
11346
modes as is given for each define_mode_iterator in
11347
config/aarch64/iterators.md. */
11349
-#define BUILTIN_DX(T, N) \
11350
- VAR2 (T, N, di, df)
11351
-#define BUILTIN_SDQ_I(T, N) \
11352
- VAR4 (T, N, qi, hi, si, di)
11353
-#define BUILTIN_SD_HSI(T, N) \
11354
- VAR2 (T, N, hi, si)
11355
-#define BUILTIN_V2F(T, N) \
11356
- VAR2 (T, N, v2sf, v2df)
11357
-#define BUILTIN_VALL(T, N) \
11358
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df)
11359
-#define BUILTIN_VB(T, N) \
11360
- VAR2 (T, N, v8qi, v16qi)
11361
-#define BUILTIN_VD(T, N) \
11362
- VAR4 (T, N, v8qi, v4hi, v2si, v2sf)
11363
-#define BUILTIN_VDC(T, N) \
11364
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
11365
-#define BUILTIN_VDIC(T, N) \
11366
- VAR3 (T, N, v8qi, v4hi, v2si)
11367
-#define BUILTIN_VDN(T, N) \
11368
- VAR3 (T, N, v4hi, v2si, di)
11369
-#define BUILTIN_VDQ(T, N) \
11370
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
11371
-#define BUILTIN_VDQF(T, N) \
11372
- VAR3 (T, N, v2sf, v4sf, v2df)
11373
-#define BUILTIN_VDQHS(T, N) \
11374
- VAR4 (T, N, v4hi, v8hi, v2si, v4si)
11375
-#define BUILTIN_VDQIF(T, N) \
11376
- VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
11377
-#define BUILTIN_VDQM(T, N) \
11378
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11379
-#define BUILTIN_VDQV(T, N) \
11380
- VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si)
11381
-#define BUILTIN_VDQ_BHSI(T, N) \
11382
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11383
-#define BUILTIN_VDQ_I(T, N) \
11384
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
11385
-#define BUILTIN_VDW(T, N) \
11386
- VAR3 (T, N, v8qi, v4hi, v2si)
11387
-#define BUILTIN_VD_BHSI(T, N) \
11388
- VAR3 (T, N, v8qi, v4hi, v2si)
11389
-#define BUILTIN_VD_HSI(T, N) \
11390
- VAR2 (T, N, v4hi, v2si)
11391
-#define BUILTIN_VD_RE(T, N) \
11392
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
11393
-#define BUILTIN_VQ(T, N) \
11394
- VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df)
11395
-#define BUILTIN_VQN(T, N) \
11396
- VAR3 (T, N, v8hi, v4si, v2di)
11397
-#define BUILTIN_VQW(T, N) \
11398
- VAR3 (T, N, v16qi, v8hi, v4si)
11399
-#define BUILTIN_VQ_HSI(T, N) \
11400
- VAR2 (T, N, v8hi, v4si)
11401
-#define BUILTIN_VQ_S(T, N) \
11402
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11403
-#define BUILTIN_VSDQ_HSI(T, N) \
11404
- VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si)
11405
-#define BUILTIN_VSDQ_I(T, N) \
11406
- VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
11407
-#define BUILTIN_VSDQ_I_BHSI(T, N) \
11408
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
11409
-#define BUILTIN_VSDQ_I_DI(T, N) \
11410
- VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
11411
-#define BUILTIN_VSD_HSI(T, N) \
11412
- VAR4 (T, N, v4hi, v2si, hi, si)
11413
-#define BUILTIN_VSQN_HSDI(T, N) \
11414
- VAR6 (T, N, v8hi, v4si, v2di, hi, si, di)
11415
-#define BUILTIN_VSTRUCT(T, N) \
11416
- VAR3 (T, N, oi, ci, xi)
11417
+#define BUILTIN_DX(T, N, MAP) \
11418
+ VAR2 (T, N, MAP, di, df)
11419
+#define BUILTIN_GPF(T, N, MAP) \
11420
+ VAR2 (T, N, MAP, sf, df)
11421
+#define BUILTIN_SDQ_I(T, N, MAP) \
11422
+ VAR4 (T, N, MAP, qi, hi, si, di)
11423
+#define BUILTIN_SD_HSI(T, N, MAP) \
11424
+ VAR2 (T, N, MAP, hi, si)
11425
+#define BUILTIN_V2F(T, N, MAP) \
11426
+ VAR2 (T, N, MAP, v2sf, v2df)
11427
+#define BUILTIN_VALL(T, N, MAP) \
11428
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
11429
+ v4si, v2di, v2sf, v4sf, v2df)
11430
+#define BUILTIN_VALLDI(T, N, MAP) \
11431
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
11432
+ v4si, v2di, v2sf, v4sf, v2df, di)
11433
+#define BUILTIN_VB(T, N, MAP) \
11434
+ VAR2 (T, N, MAP, v8qi, v16qi)
11435
+#define BUILTIN_VD(T, N, MAP) \
11436
+ VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
11437
+#define BUILTIN_VDC(T, N, MAP) \
11438
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
11439
+#define BUILTIN_VDIC(T, N, MAP) \
11440
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
11441
+#define BUILTIN_VDN(T, N, MAP) \
11442
+ VAR3 (T, N, MAP, v4hi, v2si, di)
11443
+#define BUILTIN_VDQ(T, N, MAP) \
11444
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
11445
+#define BUILTIN_VDQF(T, N, MAP) \
11446
+ VAR3 (T, N, MAP, v2sf, v4sf, v2df)
11447
+#define BUILTIN_VDQH(T, N, MAP) \
11448
+ VAR2 (T, N, MAP, v4hi, v8hi)
11449
+#define BUILTIN_VDQHS(T, N, MAP) \
11450
+ VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
11451
+#define BUILTIN_VDQIF(T, N, MAP) \
11452
+ VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
11453
+#define BUILTIN_VDQM(T, N, MAP) \
11454
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11455
+#define BUILTIN_VDQV(T, N, MAP) \
11456
+ VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
11457
+#define BUILTIN_VDQ_BHSI(T, N, MAP) \
11458
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11459
+#define BUILTIN_VDQ_I(T, N, MAP) \
11460
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
11461
+#define BUILTIN_VDW(T, N, MAP) \
11462
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
11463
+#define BUILTIN_VD_BHSI(T, N, MAP) \
11464
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
11465
+#define BUILTIN_VD_HSI(T, N, MAP) \
11466
+ VAR2 (T, N, MAP, v4hi, v2si)
11467
+#define BUILTIN_VD_RE(T, N, MAP) \
11468
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
11469
+#define BUILTIN_VQ(T, N, MAP) \
11470
+ VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
11471
+#define BUILTIN_VQN(T, N, MAP) \
11472
+ VAR3 (T, N, MAP, v8hi, v4si, v2di)
11473
+#define BUILTIN_VQW(T, N, MAP) \
11474
+ VAR3 (T, N, MAP, v16qi, v8hi, v4si)
11475
+#define BUILTIN_VQ_HSI(T, N, MAP) \
11476
+ VAR2 (T, N, MAP, v8hi, v4si)
11477
+#define BUILTIN_VQ_S(T, N, MAP) \
11478
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
11479
+#define BUILTIN_VSDQ_HSI(T, N, MAP) \
11480
+ VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
11481
+#define BUILTIN_VSDQ_I(T, N, MAP) \
11482
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
11483
+#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
11484
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
11485
+#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
11486
+ VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
11487
+#define BUILTIN_VSD_HSI(T, N, MAP) \
11488
+ VAR4 (T, N, MAP, v4hi, v2si, hi, si)
11489
+#define BUILTIN_VSQN_HSDI(T, N, MAP) \
11490
+ VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
11491
+#define BUILTIN_VSTRUCT(T, N, MAP) \
11492
+ VAR3 (T, N, MAP, oi, ci, xi)
11494
static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
11495
#include "aarch64-simd-builtins.def"
11499
-#define VAR1(T, N, A) \
11500
+#define VAR1(T, N, MAP, A) \
11501
AARCH64_SIMD_BUILTIN_##N##A,
11503
enum aarch64_builtins
11504
@@ -257,53 +273,6 @@
11505
AARCH64_BUILTIN_MAX
11509
-#undef BUILTIN_SDQ_I
11510
-#undef BUILTIN_SD_HSI
11511
-#undef BUILTIN_V2F
11512
-#undef BUILTIN_VALL
11515
-#undef BUILTIN_VDC
11516
-#undef BUILTIN_VDIC
11517
-#undef BUILTIN_VDN
11518
-#undef BUILTIN_VDQ
11519
-#undef BUILTIN_VDQF
11520
-#undef BUILTIN_VDQHS
11521
-#undef BUILTIN_VDQIF
11522
-#undef BUILTIN_VDQM
11523
-#undef BUILTIN_VDQV
11524
-#undef BUILTIN_VDQ_BHSI
11525
-#undef BUILTIN_VDQ_I
11526
-#undef BUILTIN_VDW
11527
-#undef BUILTIN_VD_BHSI
11528
-#undef BUILTIN_VD_HSI
11529
-#undef BUILTIN_VD_RE
11531
-#undef BUILTIN_VQN
11532
-#undef BUILTIN_VQW
11533
-#undef BUILTIN_VQ_HSI
11534
-#undef BUILTIN_VQ_S
11535
-#undef BUILTIN_VSDQ_HSI
11536
-#undef BUILTIN_VSDQ_I
11537
-#undef BUILTIN_VSDQ_I_BHSI
11538
-#undef BUILTIN_VSDQ_I_DI
11539
-#undef BUILTIN_VSD_HSI
11540
-#undef BUILTIN_VSQN_HSDI
11541
-#undef BUILTIN_VSTRUCT
11555
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
11557
#define NUM_DREG_TYPES 6
11558
@@ -609,7 +578,7 @@
11560
"v8qi", "v4hi", "v2si", "v2sf", "di", "df",
11561
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
11562
- "ti", "ei", "oi", "xi", "si", "hi", "qi"
11563
+ "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
11567
@@ -1258,19 +1227,19 @@
11568
&& in_mode == N##Fmode && in_n == C)
11569
case BUILT_IN_FLOOR:
11570
case BUILT_IN_FLOORF:
11571
- return AARCH64_FIND_FRINT_VARIANT (frintm);
11572
+ return AARCH64_FIND_FRINT_VARIANT (floor);
11573
case BUILT_IN_CEIL:
11574
case BUILT_IN_CEILF:
11575
- return AARCH64_FIND_FRINT_VARIANT (frintp);
11576
+ return AARCH64_FIND_FRINT_VARIANT (ceil);
11577
case BUILT_IN_TRUNC:
11578
case BUILT_IN_TRUNCF:
11579
- return AARCH64_FIND_FRINT_VARIANT (frintz);
11580
+ return AARCH64_FIND_FRINT_VARIANT (btrunc);
11581
case BUILT_IN_ROUND:
11582
case BUILT_IN_ROUNDF:
11583
- return AARCH64_FIND_FRINT_VARIANT (frinta);
11584
+ return AARCH64_FIND_FRINT_VARIANT (round);
11585
case BUILT_IN_NEARBYINT:
11586
case BUILT_IN_NEARBYINTF:
11587
- return AARCH64_FIND_FRINT_VARIANT (frinti);
11588
+ return AARCH64_FIND_FRINT_VARIANT (nearbyint);
11589
case BUILT_IN_SQRT:
11590
case BUILT_IN_SQRTF:
11591
return AARCH64_FIND_FRINT_VARIANT (sqrt);
11592
@@ -1279,9 +1248,51 @@
11593
(out_mode == N##Imode && out_n == C \
11594
&& in_mode == N##Fmode && in_n == C)
11595
case BUILT_IN_LFLOOR:
11596
- return AARCH64_FIND_FRINT_VARIANT (fcvtms);
11597
+ case BUILT_IN_IFLOORF:
11599
+ tree new_tree = NULL_TREE;
11600
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
11602
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2dfv2di];
11603
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
11605
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv4sfv4si];
11606
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
11608
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2sfv2si];
11611
case BUILT_IN_LCEIL:
11612
- return AARCH64_FIND_FRINT_VARIANT (fcvtps);
11613
+ case BUILT_IN_ICEILF:
11615
+ tree new_tree = NULL_TREE;
11616
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
11618
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2dfv2di];
11619
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
11621
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv4sfv4si];
11622
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
11624
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2sfv2si];
11627
+ case BUILT_IN_LROUND:
11628
+ case BUILT_IN_IROUNDF:
11630
+ tree new_tree = NULL_TREE;
11631
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
11633
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2dfv2di];
11634
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
11636
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv4sfv4si];
11637
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
11639
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2sfv2si];
11646
@@ -1289,5 +1300,145 @@
11652
+#define VAR1(T, N, MAP, A) \
11653
+ case AARCH64_SIMD_BUILTIN_##N##A:
11656
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
11657
+ bool ignore ATTRIBUTE_UNUSED)
11659
+ int fcode = DECL_FUNCTION_CODE (fndecl);
11660
+ tree type = TREE_TYPE (TREE_TYPE (fndecl));
11664
+ BUILTIN_VDQF (UNOP, abs, 2)
11665
+ return fold_build1 (ABS_EXPR, type, args[0]);
11667
+ BUILTIN_VALLDI (BINOP, cmge, 0)
11668
+ return fold_build2 (GE_EXPR, type, args[0], args[1]);
11670
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
11671
+ return fold_build2 (GT_EXPR, type, args[0], args[1]);
11673
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
11674
+ return fold_build2 (EQ_EXPR, type, args[0], args[1]);
11676
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
11678
+ tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
11679
+ tree vec_zero_node = build_zero_cst (type);
11680
+ return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
11683
+ VAR1 (UNOP, floatv2si, 2, v2sf)
11684
+ VAR1 (UNOP, floatv4si, 2, v4sf)
11685
+ VAR1 (UNOP, floatv2di, 2, v2df)
11686
+ return fold_build1 (FLOAT_EXPR, type, args[0]);
11691
+ return NULL_TREE;
11695
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
11697
+ bool changed = false;
11698
+ gimple stmt = gsi_stmt (*gsi);
11699
+ tree call = gimple_call_fn (stmt);
11701
+ gimple new_stmt = NULL;
11704
+ fndecl = gimple_call_fndecl (stmt);
11707
+ int fcode = DECL_FUNCTION_CODE (fndecl);
11708
+ int nargs = gimple_call_num_args (stmt);
11709
+ tree *args = (nargs > 0
11710
+ ? gimple_call_arg_ptr (stmt, 0)
11711
+ : &error_mark_node);
11715
+ BUILTIN_VDQF (UNOP, addv, 0)
11716
+ new_stmt = gimple_build_assign_with_ops (
11718
+ gimple_call_lhs (stmt),
11730
+ gsi_replace (gsi, new_stmt, true);
11737
#undef AARCH64_CHECK_BUILTIN_MODE
11738
#undef AARCH64_FIND_FRINT_VARIANT
11740
+#undef BUILTIN_SDQ_I
11741
+#undef BUILTIN_SD_HSI
11742
+#undef BUILTIN_V2F
11743
+#undef BUILTIN_VALL
11746
+#undef BUILTIN_VDC
11747
+#undef BUILTIN_VDIC
11748
+#undef BUILTIN_VDN
11749
+#undef BUILTIN_VDQ
11750
+#undef BUILTIN_VDQF
11751
+#undef BUILTIN_VDQH
11752
+#undef BUILTIN_VDQHS
11753
+#undef BUILTIN_VDQIF
11754
+#undef BUILTIN_VDQM
11755
+#undef BUILTIN_VDQV
11756
+#undef BUILTIN_VDQ_BHSI
11757
+#undef BUILTIN_VDQ_I
11758
+#undef BUILTIN_VDW
11759
+#undef BUILTIN_VD_BHSI
11760
+#undef BUILTIN_VD_HSI
11761
+#undef BUILTIN_VD_RE
11763
+#undef BUILTIN_VQN
11764
+#undef BUILTIN_VQW
11765
+#undef BUILTIN_VQ_HSI
11766
+#undef BUILTIN_VQ_S
11767
+#undef BUILTIN_VSDQ_HSI
11768
+#undef BUILTIN_VSDQ_I
11769
+#undef BUILTIN_VSDQ_I_BHSI
11770
+#undef BUILTIN_VSDQ_I_DI
11771
+#undef BUILTIN_VSD_HSI
11772
+#undef BUILTIN_VSQN_HSDI
11773
+#undef BUILTIN_VSTRUCT
11792
--- a/src/gcc/config/aarch64/aarch64-protos.h
11793
+++ b/src/gcc/config/aarch64/aarch64-protos.h
11794
@@ -140,6 +140,7 @@
11795
bool aarch64_float_const_zero_rtx_p (rtx);
11796
bool aarch64_function_arg_regno_p (unsigned);
11797
bool aarch64_gen_movmemqi (rtx *);
11798
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
11799
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
11800
bool aarch64_is_long_call_p (rtx);
11801
bool aarch64_label_mentioned_p (rtx);
11802
@@ -151,6 +152,7 @@
11803
bool aarch64_regno_ok_for_index_p (int, bool);
11804
bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
11805
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
11806
+bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
11807
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
11808
bool aarch64_symbolic_address_p (rtx);
11809
bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
11810
@@ -177,6 +179,7 @@
11811
bool aarch64_simd_mem_operand_p (rtx);
11812
rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
11813
rtx aarch64_tls_get_addr (void);
11814
+tree aarch64_fold_builtin (tree, int, tree *, bool);
11815
unsigned aarch64_dbx_register_number (unsigned);
11816
unsigned aarch64_trampoline_size (void);
11817
void aarch64_asm_output_labelref (FILE *, const char *);
11818
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
11819
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
11820
@@ -18,241 +18,329 @@
11821
along with GCC; see the file COPYING3. If not see
11822
<http://www.gnu.org/licenses/>. */
11824
-/* In the list below, the BUILTIN_<ITERATOR> macros should
11825
- correspond to the iterator used to construct the instruction's
11826
- patterns in aarch64-simd.md. A helpful idiom to follow when
11827
- adding new builtins is to add a line for each pattern in the md
11828
- file. Thus, ADDP, which has one pattern defined for the VD_BHSI
11829
- iterator, and one for DImode, has two entries below. */
11830
+/* In the list below, the BUILTIN_<ITERATOR> macros expand to create
11831
+ builtins for each of the modes described by <ITERATOR>. When adding
11832
+ new builtins to this list, a helpful idiom to follow is to add
11833
+ a line for each pattern in the md file. Thus, ADDP, which has one
11834
+ pattern defined for the VD_BHSI iterator, and one for DImode, has two
11837
- BUILTIN_VD_RE (CREATE, create)
11838
- BUILTIN_VQ_S (GETLANE, get_lane_signed)
11839
- BUILTIN_VDQ (GETLANE, get_lane_unsigned)
11840
- BUILTIN_VDQF (GETLANE, get_lane)
11841
- VAR1 (GETLANE, get_lane, di)
11842
- BUILTIN_VDC (COMBINE, combine)
11843
- BUILTIN_VB (BINOP, pmul)
11844
- BUILTIN_VDQF (UNOP, sqrt)
11845
- BUILTIN_VD_BHSI (BINOP, addp)
11846
- VAR1 (UNOP, addp, di)
11847
+ Parameter 1 is the 'type' of the intrinsic. This is used to
11848
+ describe the type modifiers (for example; unsigned) applied to
11849
+ each of the parameters to the intrinsic function.
11851
- BUILTIN_VD_RE (REINTERP, reinterpretdi)
11852
- BUILTIN_VDC (REINTERP, reinterpretv8qi)
11853
- BUILTIN_VDC (REINTERP, reinterpretv4hi)
11854
- BUILTIN_VDC (REINTERP, reinterpretv2si)
11855
- BUILTIN_VDC (REINTERP, reinterpretv2sf)
11856
- BUILTIN_VQ (REINTERP, reinterpretv16qi)
11857
- BUILTIN_VQ (REINTERP, reinterpretv8hi)
11858
- BUILTIN_VQ (REINTERP, reinterpretv4si)
11859
- BUILTIN_VQ (REINTERP, reinterpretv4sf)
11860
- BUILTIN_VQ (REINTERP, reinterpretv2di)
11861
- BUILTIN_VQ (REINTERP, reinterpretv2df)
11862
+ Parameter 2 is the name of the intrinsic. This is appended
11863
+ to `__builtin_aarch64_<name><mode>` to give the intrinsic name
11864
+ as exported to the front-ends.
11866
- BUILTIN_VDQ_I (BINOP, dup_lane)
11867
- BUILTIN_SDQ_I (BINOP, dup_lane)
11868
+ Parameter 3 describes how to map from the name to the CODE_FOR_
11869
+ macro holding the RTL pattern for the intrinsic. This mapping is:
11870
+ 0 - CODE_FOR_aarch64_<name><mode>
11871
+ 1-9 - CODE_FOR_<name><mode><1-9>
11872
+ 10 - CODE_FOR_<name><mode>. */
11874
+ BUILTIN_VD_RE (CREATE, create, 0)
11875
+ BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
11876
+ BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
11877
+ BUILTIN_VDQF (GETLANE, get_lane, 0)
11878
+ VAR1 (GETLANE, get_lane, 0, di)
11879
+ BUILTIN_VDC (COMBINE, combine, 0)
11880
+ BUILTIN_VB (BINOP, pmul, 0)
11881
+ BUILTIN_VDQF (UNOP, sqrt, 2)
11882
+ BUILTIN_VD_BHSI (BINOP, addp, 0)
11883
+ VAR1 (UNOP, addp, 0, di)
11885
+ BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
11886
+ BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
11887
+ BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
11888
+ BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
11889
+ BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
11890
+ BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
11891
+ BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
11892
+ BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
11893
+ BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
11894
+ BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
11895
+ BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
11897
+ BUILTIN_VDQ_I (BINOP, dup_lane, 0)
11898
+ BUILTIN_SDQ_I (BINOP, dup_lane, 0)
11899
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
11900
- BUILTIN_VSDQ_I (BINOP, sqshl)
11901
- BUILTIN_VSDQ_I (BINOP, uqshl)
11902
- BUILTIN_VSDQ_I (BINOP, sqrshl)
11903
- BUILTIN_VSDQ_I (BINOP, uqrshl)
11904
+ BUILTIN_VSDQ_I (BINOP, sqshl, 0)
11905
+ BUILTIN_VSDQ_I (BINOP, uqshl, 0)
11906
+ BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
11907
+ BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
11908
/* Implemented by aarch64_<su_optab><optab><mode>. */
11909
- BUILTIN_VSDQ_I (BINOP, sqadd)
11910
- BUILTIN_VSDQ_I (BINOP, uqadd)
11911
- BUILTIN_VSDQ_I (BINOP, sqsub)
11912
- BUILTIN_VSDQ_I (BINOP, uqsub)
11913
+ BUILTIN_VSDQ_I (BINOP, sqadd, 0)
11914
+ BUILTIN_VSDQ_I (BINOP, uqadd, 0)
11915
+ BUILTIN_VSDQ_I (BINOP, sqsub, 0)
11916
+ BUILTIN_VSDQ_I (BINOP, uqsub, 0)
11917
/* Implemented by aarch64_<sur>qadd<mode>. */
11918
- BUILTIN_VSDQ_I (BINOP, suqadd)
11919
- BUILTIN_VSDQ_I (BINOP, usqadd)
11920
+ BUILTIN_VSDQ_I (BINOP, suqadd, 0)
11921
+ BUILTIN_VSDQ_I (BINOP, usqadd, 0)
11923
/* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
11924
- BUILTIN_VDC (GETLANE, get_dregoi)
11925
- BUILTIN_VDC (GETLANE, get_dregci)
11926
- BUILTIN_VDC (GETLANE, get_dregxi)
11927
+ BUILTIN_VDC (GETLANE, get_dregoi, 0)
11928
+ BUILTIN_VDC (GETLANE, get_dregci, 0)
11929
+ BUILTIN_VDC (GETLANE, get_dregxi, 0)
11930
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
11931
- BUILTIN_VQ (GETLANE, get_qregoi)
11932
- BUILTIN_VQ (GETLANE, get_qregci)
11933
- BUILTIN_VQ (GETLANE, get_qregxi)
11934
+ BUILTIN_VQ (GETLANE, get_qregoi, 0)
11935
+ BUILTIN_VQ (GETLANE, get_qregci, 0)
11936
+ BUILTIN_VQ (GETLANE, get_qregxi, 0)
11937
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
11938
- BUILTIN_VQ (SETLANE, set_qregoi)
11939
- BUILTIN_VQ (SETLANE, set_qregci)
11940
- BUILTIN_VQ (SETLANE, set_qregxi)
11941
+ BUILTIN_VQ (SETLANE, set_qregoi, 0)
11942
+ BUILTIN_VQ (SETLANE, set_qregci, 0)
11943
+ BUILTIN_VQ (SETLANE, set_qregxi, 0)
11944
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
11945
- BUILTIN_VDC (LOADSTRUCT, ld2)
11946
- BUILTIN_VDC (LOADSTRUCT, ld3)
11947
- BUILTIN_VDC (LOADSTRUCT, ld4)
11948
+ BUILTIN_VDC (LOADSTRUCT, ld2, 0)
11949
+ BUILTIN_VDC (LOADSTRUCT, ld3, 0)
11950
+ BUILTIN_VDC (LOADSTRUCT, ld4, 0)
11951
/* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */
11952
- BUILTIN_VQ (LOADSTRUCT, ld2)
11953
- BUILTIN_VQ (LOADSTRUCT, ld3)
11954
- BUILTIN_VQ (LOADSTRUCT, ld4)
11955
+ BUILTIN_VQ (LOADSTRUCT, ld2, 0)
11956
+ BUILTIN_VQ (LOADSTRUCT, ld3, 0)
11957
+ BUILTIN_VQ (LOADSTRUCT, ld4, 0)
11958
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
11959
- BUILTIN_VDC (STORESTRUCT, st2)
11960
- BUILTIN_VDC (STORESTRUCT, st3)
11961
- BUILTIN_VDC (STORESTRUCT, st4)
11962
+ BUILTIN_VDC (STORESTRUCT, st2, 0)
11963
+ BUILTIN_VDC (STORESTRUCT, st3, 0)
11964
+ BUILTIN_VDC (STORESTRUCT, st4, 0)
11965
/* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */
11966
- BUILTIN_VQ (STORESTRUCT, st2)
11967
- BUILTIN_VQ (STORESTRUCT, st3)
11968
- BUILTIN_VQ (STORESTRUCT, st4)
11969
+ BUILTIN_VQ (STORESTRUCT, st2, 0)
11970
+ BUILTIN_VQ (STORESTRUCT, st3, 0)
11971
+ BUILTIN_VQ (STORESTRUCT, st4, 0)
11973
- BUILTIN_VQW (BINOP, saddl2)
11974
- BUILTIN_VQW (BINOP, uaddl2)
11975
- BUILTIN_VQW (BINOP, ssubl2)
11976
- BUILTIN_VQW (BINOP, usubl2)
11977
- BUILTIN_VQW (BINOP, saddw2)
11978
- BUILTIN_VQW (BINOP, uaddw2)
11979
- BUILTIN_VQW (BINOP, ssubw2)
11980
- BUILTIN_VQW (BINOP, usubw2)
11981
+ BUILTIN_VQW (BINOP, saddl2, 0)
11982
+ BUILTIN_VQW (BINOP, uaddl2, 0)
11983
+ BUILTIN_VQW (BINOP, ssubl2, 0)
11984
+ BUILTIN_VQW (BINOP, usubl2, 0)
11985
+ BUILTIN_VQW (BINOP, saddw2, 0)
11986
+ BUILTIN_VQW (BINOP, uaddw2, 0)
11987
+ BUILTIN_VQW (BINOP, ssubw2, 0)
11988
+ BUILTIN_VQW (BINOP, usubw2, 0)
11989
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */
11990
- BUILTIN_VDW (BINOP, saddl)
11991
- BUILTIN_VDW (BINOP, uaddl)
11992
- BUILTIN_VDW (BINOP, ssubl)
11993
- BUILTIN_VDW (BINOP, usubl)
11994
+ BUILTIN_VDW (BINOP, saddl, 0)
11995
+ BUILTIN_VDW (BINOP, uaddl, 0)
11996
+ BUILTIN_VDW (BINOP, ssubl, 0)
11997
+ BUILTIN_VDW (BINOP, usubl, 0)
11998
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */
11999
- BUILTIN_VDW (BINOP, saddw)
12000
- BUILTIN_VDW (BINOP, uaddw)
12001
- BUILTIN_VDW (BINOP, ssubw)
12002
- BUILTIN_VDW (BINOP, usubw)
12003
+ BUILTIN_VDW (BINOP, saddw, 0)
12004
+ BUILTIN_VDW (BINOP, uaddw, 0)
12005
+ BUILTIN_VDW (BINOP, ssubw, 0)
12006
+ BUILTIN_VDW (BINOP, usubw, 0)
12007
/* Implemented by aarch64_<sur>h<addsub><mode>. */
12008
- BUILTIN_VQ_S (BINOP, shadd)
12009
- BUILTIN_VQ_S (BINOP, uhadd)
12010
- BUILTIN_VQ_S (BINOP, srhadd)
12011
- BUILTIN_VQ_S (BINOP, urhadd)
12012
+ BUILTIN_VQ_S (BINOP, shadd, 0)
12013
+ BUILTIN_VQ_S (BINOP, uhadd, 0)
12014
+ BUILTIN_VQ_S (BINOP, srhadd, 0)
12015
+ BUILTIN_VQ_S (BINOP, urhadd, 0)
12016
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
12017
- BUILTIN_VQN (BINOP, addhn)
12018
- BUILTIN_VQN (BINOP, raddhn)
12019
+ BUILTIN_VQN (BINOP, addhn, 0)
12020
+ BUILTIN_VQN (BINOP, raddhn, 0)
12021
/* Implemented by aarch64_<sur><addsub>hn2<mode>. */
12022
- BUILTIN_VQN (TERNOP, addhn2)
12023
- BUILTIN_VQN (TERNOP, raddhn2)
12024
+ BUILTIN_VQN (TERNOP, addhn2, 0)
12025
+ BUILTIN_VQN (TERNOP, raddhn2, 0)
12027
- BUILTIN_VSQN_HSDI (UNOP, sqmovun)
12028
+ BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
12029
/* Implemented by aarch64_<sur>qmovn<mode>. */
12030
- BUILTIN_VSQN_HSDI (UNOP, sqmovn)
12031
- BUILTIN_VSQN_HSDI (UNOP, uqmovn)
12032
+ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
12033
+ BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
12034
/* Implemented by aarch64_s<optab><mode>. */
12035
- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs)
12036
- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg)
12037
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
12038
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
12040
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane)
12041
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane)
12042
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq)
12043
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq)
12044
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2)
12045
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2)
12046
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane)
12047
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane)
12048
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq)
12049
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq)
12050
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n)
12051
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n)
12052
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
12053
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
12054
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
12055
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
12056
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
12057
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
12058
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
12059
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
12060
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
12061
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
12062
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
12063
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
12064
/* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */
12065
- BUILTIN_VSD_HSI (TERNOP, sqdmlal)
12066
- BUILTIN_VSD_HSI (TERNOP, sqdmlsl)
12067
+ BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
12068
+ BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
12069
/* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */
12070
- BUILTIN_VD_HSI (TERNOP, sqdmlal_n)
12071
- BUILTIN_VD_HSI (TERNOP, sqdmlsl_n)
12072
+ BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
12073
+ BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
12075
- BUILTIN_VSD_HSI (BINOP, sqdmull)
12076
- BUILTIN_VSD_HSI (TERNOP, sqdmull_lane)
12077
- BUILTIN_VD_HSI (TERNOP, sqdmull_laneq)
12078
- BUILTIN_VD_HSI (BINOP, sqdmull_n)
12079
- BUILTIN_VQ_HSI (BINOP, sqdmull2)
12080
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane)
12081
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq)
12082
- BUILTIN_VQ_HSI (BINOP, sqdmull2_n)
12083
+ BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
12084
+ BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
12085
+ BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
12086
+ BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
12087
+ BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
12088
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
12089
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0)
12090
+ BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
12091
/* Implemented by aarch64_sq<r>dmulh<mode>. */
12092
- BUILTIN_VSDQ_HSI (BINOP, sqdmulh)
12093
- BUILTIN_VSDQ_HSI (BINOP, sqrdmulh)
12094
+ BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
12095
+ BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
12096
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
12097
- BUILTIN_VDQHS (TERNOP, sqdmulh_lane)
12098
- BUILTIN_VDQHS (TERNOP, sqdmulh_laneq)
12099
- BUILTIN_VDQHS (TERNOP, sqrdmulh_lane)
12100
- BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq)
12101
- BUILTIN_SD_HSI (TERNOP, sqdmulh_lane)
12102
- BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane)
12103
+ BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
12104
+ BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
12105
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
12106
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
12107
+ BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
12108
+ BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
12110
- BUILTIN_VSDQ_I_DI (BINOP, sshl_n)
12111
- BUILTIN_VSDQ_I_DI (BINOP, ushl_n)
12112
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
12113
/* Implemented by aarch64_<sur>shl<mode>. */
12114
- BUILTIN_VSDQ_I_DI (BINOP, sshl)
12115
- BUILTIN_VSDQ_I_DI (BINOP, ushl)
12116
- BUILTIN_VSDQ_I_DI (BINOP, srshl)
12117
- BUILTIN_VSDQ_I_DI (BINOP, urshl)
12118
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
12119
+ BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
12120
+ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
12121
+ BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
12123
- BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n)
12124
- BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n)
12125
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3)
12126
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
12127
/* Implemented by aarch64_<sur>shr_n<mode>. */
12128
- BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n)
12129
- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n)
12130
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
12131
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
12132
/* Implemented by aarch64_<sur>sra_n<mode>. */
12133
- BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n)
12134
- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n)
12135
- BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n)
12136
- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n)
12137
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
12138
+ BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
12139
+ BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
12140
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
12141
/* Implemented by aarch64_<sur>shll_n<mode>. */
12142
- BUILTIN_VDW (SHIFTIMM, sshll_n)
12143
- BUILTIN_VDW (SHIFTIMM, ushll_n)
12144
+ BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
12145
+ BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
12146
/* Implemented by aarch64_<sur>shll2_n<mode>. */
12147
- BUILTIN_VQW (SHIFTIMM, sshll2_n)
12148
- BUILTIN_VQW (SHIFTIMM, ushll2_n)
12149
+ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
12150
+ BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
12151
/* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
12152
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n)
12153
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n)
12154
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n)
12155
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n)
12156
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n)
12157
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n)
12158
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
12159
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
12160
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
12161
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
12162
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
12163
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
12164
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
12165
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n)
12166
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n)
12167
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n)
12168
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n)
12169
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
12170
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
12171
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
12172
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
12173
/* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
12174
- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n)
12175
- BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n)
12176
- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n)
12177
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
12178
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
12179
+ BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
12181
/* Implemented by aarch64_cm<cmp><mode>. */
12182
- BUILTIN_VSDQ_I_DI (BINOP, cmeq)
12183
- BUILTIN_VSDQ_I_DI (BINOP, cmge)
12184
- BUILTIN_VSDQ_I_DI (BINOP, cmgt)
12185
- BUILTIN_VSDQ_I_DI (BINOP, cmle)
12186
- BUILTIN_VSDQ_I_DI (BINOP, cmlt)
12187
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
12188
+ BUILTIN_VALLDI (BINOP, cmge, 0)
12189
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
12190
+ BUILTIN_VALLDI (BINOP, cmle, 0)
12191
+ BUILTIN_VALLDI (BINOP, cmlt, 0)
12192
/* Implemented by aarch64_cm<cmp><mode>. */
12193
- BUILTIN_VSDQ_I_DI (BINOP, cmhs)
12194
- BUILTIN_VSDQ_I_DI (BINOP, cmhi)
12195
- BUILTIN_VSDQ_I_DI (BINOP, cmtst)
12196
+ BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
12197
+ BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
12198
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
12200
/* Implemented by aarch64_<fmaxmin><mode>. */
12201
- BUILTIN_VDQF (BINOP, fmax)
12202
- BUILTIN_VDQF (BINOP, fmin)
12203
- /* Implemented by aarch64_<maxmin><mode>. */
12204
- BUILTIN_VDQ_BHSI (BINOP, smax)
12205
- BUILTIN_VDQ_BHSI (BINOP, smin)
12206
- BUILTIN_VDQ_BHSI (BINOP, umax)
12207
- BUILTIN_VDQ_BHSI (BINOP, umin)
12208
+ BUILTIN_VDQF (BINOP, fmax, 0)
12209
+ BUILTIN_VDQF (BINOP, fmin, 0)
12211
- /* Implemented by aarch64_frint<frint_suffix><mode>. */
12212
- BUILTIN_VDQF (UNOP, frintz)
12213
- BUILTIN_VDQF (UNOP, frintp)
12214
- BUILTIN_VDQF (UNOP, frintm)
12215
- BUILTIN_VDQF (UNOP, frinti)
12216
- BUILTIN_VDQF (UNOP, frintx)
12217
- BUILTIN_VDQF (UNOP, frinta)
12218
+ /* Implemented by aarch64_addv<mode>. */
12219
+ BUILTIN_VDQF (UNOP, addv, 0)
12221
- /* Implemented by aarch64_fcvt<frint_suffix><su><mode>. */
12222
- BUILTIN_VDQF (UNOP, fcvtzs)
12223
- BUILTIN_VDQF (UNOP, fcvtzu)
12224
- BUILTIN_VDQF (UNOP, fcvtas)
12225
- BUILTIN_VDQF (UNOP, fcvtau)
12226
- BUILTIN_VDQF (UNOP, fcvtps)
12227
- BUILTIN_VDQF (UNOP, fcvtpu)
12228
- BUILTIN_VDQF (UNOP, fcvtms)
12229
- BUILTIN_VDQF (UNOP, fcvtmu)
12230
+ /* Implemented by <maxmin><mode>3. */
12231
+ BUILTIN_VDQ_BHSI (BINOP, smax, 3)
12232
+ BUILTIN_VDQ_BHSI (BINOP, smin, 3)
12233
+ BUILTIN_VDQ_BHSI (BINOP, umax, 3)
12234
+ BUILTIN_VDQ_BHSI (BINOP, umin, 3)
12236
+ /* Implemented by <frint_pattern><mode>2. */
12237
+ BUILTIN_VDQF (UNOP, btrunc, 2)
12238
+ BUILTIN_VDQF (UNOP, ceil, 2)
12239
+ BUILTIN_VDQF (UNOP, floor, 2)
12240
+ BUILTIN_VDQF (UNOP, nearbyint, 2)
12241
+ BUILTIN_VDQF (UNOP, rint, 2)
12242
+ BUILTIN_VDQF (UNOP, round, 2)
12243
+ BUILTIN_VDQF (UNOP, frintn, 2)
12245
+ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
12246
+ VAR1 (UNOP, lbtruncv2sf, 2, v2si)
12247
+ VAR1 (UNOP, lbtruncv4sf, 2, v4si)
12248
+ VAR1 (UNOP, lbtruncv2df, 2, v2di)
12250
+ VAR1 (UNOP, lbtruncuv2sf, 2, v2si)
12251
+ VAR1 (UNOP, lbtruncuv4sf, 2, v4si)
12252
+ VAR1 (UNOP, lbtruncuv2df, 2, v2di)
12254
+ VAR1 (UNOP, lroundv2sf, 2, v2si)
12255
+ VAR1 (UNOP, lroundv4sf, 2, v4si)
12256
+ VAR1 (UNOP, lroundv2df, 2, v2di)
12257
+ /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */
12258
+ VAR1 (UNOP, lroundsf, 2, si)
12259
+ VAR1 (UNOP, lrounddf, 2, di)
12261
+ VAR1 (UNOP, lrounduv2sf, 2, v2si)
12262
+ VAR1 (UNOP, lrounduv4sf, 2, v4si)
12263
+ VAR1 (UNOP, lrounduv2df, 2, v2di)
12264
+ VAR1 (UNOP, lroundusf, 2, si)
12265
+ VAR1 (UNOP, lroundudf, 2, di)
12267
+ VAR1 (UNOP, lceilv2sf, 2, v2si)
12268
+ VAR1 (UNOP, lceilv4sf, 2, v4si)
12269
+ VAR1 (UNOP, lceilv2df, 2, v2di)
12271
+ VAR1 (UNOP, lceiluv2sf, 2, v2si)
12272
+ VAR1 (UNOP, lceiluv4sf, 2, v4si)
12273
+ VAR1 (UNOP, lceiluv2df, 2, v2di)
12274
+ VAR1 (UNOP, lceilusf, 2, si)
12275
+ VAR1 (UNOP, lceiludf, 2, di)
12277
+ VAR1 (UNOP, lfloorv2sf, 2, v2si)
12278
+ VAR1 (UNOP, lfloorv4sf, 2, v4si)
12279
+ VAR1 (UNOP, lfloorv2df, 2, v2di)
12281
+ VAR1 (UNOP, lflooruv2sf, 2, v2si)
12282
+ VAR1 (UNOP, lflooruv4sf, 2, v4si)
12283
+ VAR1 (UNOP, lflooruv2df, 2, v2di)
12284
+ VAR1 (UNOP, lfloorusf, 2, si)
12285
+ VAR1 (UNOP, lfloorudf, 2, di)
12287
+ VAR1 (UNOP, lfrintnv2sf, 2, v2si)
12288
+ VAR1 (UNOP, lfrintnv4sf, 2, v4si)
12289
+ VAR1 (UNOP, lfrintnv2df, 2, v2di)
12290
+ VAR1 (UNOP, lfrintnsf, 2, si)
12291
+ VAR1 (UNOP, lfrintndf, 2, di)
12293
+ VAR1 (UNOP, lfrintnuv2sf, 2, v2si)
12294
+ VAR1 (UNOP, lfrintnuv4sf, 2, v4si)
12295
+ VAR1 (UNOP, lfrintnuv2df, 2, v2di)
12296
+ VAR1 (UNOP, lfrintnusf, 2, si)
12297
+ VAR1 (UNOP, lfrintnudf, 2, di)
12299
+ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
12300
+ VAR1 (UNOP, floatv2si, 2, v2sf)
12301
+ VAR1 (UNOP, floatv4si, 2, v4sf)
12302
+ VAR1 (UNOP, floatv2di, 2, v2df)
12304
+ VAR1 (UNOP, floatunsv2si, 2, v2sf)
12305
+ VAR1 (UNOP, floatunsv4si, 2, v4sf)
12306
+ VAR1 (UNOP, floatunsv2di, 2, v2df)
12309
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
12310
- BUILTIN_VALL (BINOP, zip1)
12311
- BUILTIN_VALL (BINOP, zip2)
12312
- BUILTIN_VALL (BINOP, uzp1)
12313
- BUILTIN_VALL (BINOP, uzp2)
12314
- BUILTIN_VALL (BINOP, trn1)
12315
- BUILTIN_VALL (BINOP, trn2)
12316
+ BUILTIN_VALL (BINOP, zip1, 0)
12317
+ BUILTIN_VALL (BINOP, zip2, 0)
12318
+ BUILTIN_VALL (BINOP, uzp1, 0)
12319
+ BUILTIN_VALL (BINOP, uzp2, 0)
12320
+ BUILTIN_VALL (BINOP, trn1, 0)
12321
+ BUILTIN_VALL (BINOP, trn2, 0)
12323
+ /* Implemented by
12324
+ aarch64_frecp<FRECP:frecp_suffix><mode>. */
12325
+ BUILTIN_GPF (UNOP, frecpe, 0)
12326
+ BUILTIN_GPF (BINOP, frecps, 0)
12327
+ BUILTIN_GPF (UNOP, frecpx, 0)
12329
+ BUILTIN_VDQF (UNOP, frecpe, 0)
12330
+ BUILTIN_VDQF (BINOP, frecps, 0)
12332
+ BUILTIN_VDQF (UNOP, abs, 2)
12334
+ VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
12335
+ VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
12337
+ VAR1 (UNOP, float_extend_lo_, 0, v2df)
12338
+ VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
12339
--- a/src/gcc/config/aarch64/constraints.md
12340
+++ b/src/gcc/config/aarch64/constraints.md
12341
@@ -152,6 +152,22 @@
12343
NULL, NULL) != 0")))
12345
+(define_constraint "Dh"
12347
+ A constraint that matches an immediate operand valid for\
12348
+ AdvSIMD scalar move in HImode."
12349
+ (and (match_code "const_int")
12350
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
12353
+(define_constraint "Dq"
12355
+ A constraint that matches an immediate operand valid for\
12356
+ AdvSIMD scalar move in QImode."
12357
+ (and (match_code "const_int")
12358
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
12361
(define_constraint "Dl"
12363
A constraint that matches vector of immediates for left shifts."
12364
--- a/src/gcc/config/aarch64/aarch64.c
12365
+++ b/src/gcc/config/aarch64/aarch64.c
12366
@@ -3087,7 +3087,8 @@
12367
if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
12369
&& (code == EQ || code == NE || code == LT || code == GE)
12370
- && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
12371
+ && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
12372
+ || GET_CODE (x) == NEG))
12375
/* A compare with a shifted operand. Because of canonicalization,
12376
@@ -3349,7 +3350,7 @@
12377
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
12380
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
12381
+ asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
12385
@@ -3362,8 +3363,7 @@
12386
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
12389
- asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
12390
- REGNO (x) - V0_REGNUM + (code - 'S'));
12391
+ asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
12395
@@ -3383,20 +3383,19 @@
12396
if (x == const0_rtx
12397
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
12399
- asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
12400
+ asm_fprintf (f, "%czr", code);
12404
if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
12406
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
12407
- REGNO (x) - R0_REGNUM);
12408
+ asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
12412
if (REG_P (x) && REGNO (x) == SP_REGNUM)
12414
- asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
12415
+ asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
12419
@@ -6409,6 +6408,21 @@
12420
return gen_rtx_CONST_VECTOR (mode, v);
12423
+/* Check OP is a legal scalar immediate for the MOVI instruction. */
12426
+aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
12428
+ enum machine_mode vmode;
12430
+ gcc_assert (!VECTOR_MODE_P (mode));
12431
+ vmode = aarch64_preferred_simd_mode (mode);
12432
+ rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
12433
+ int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
12434
+ NULL, NULL, NULL, NULL);
12438
/* Construct and return a PARALLEL RTX vector. */
12440
aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
12441
@@ -7860,6 +7874,9 @@
12442
#undef TARGET_EXPAND_BUILTIN_VA_START
12443
#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
12445
+#undef TARGET_FOLD_BUILTIN
12446
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
12448
#undef TARGET_FUNCTION_ARG
12449
#define TARGET_FUNCTION_ARG aarch64_function_arg
12451
@@ -7881,6 +7898,9 @@
12452
#undef TARGET_FRAME_POINTER_REQUIRED
12453
#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
12455
+#undef TARGET_GIMPLE_FOLD_BUILTIN
12456
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
12458
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
12459
#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
12461
--- a/src/gcc/config/aarch64/iterators.md
12462
+++ b/src/gcc/config/aarch64/iterators.md
12464
;; Vector Float modes.
12465
(define_mode_iterator VDQF [V2SF V4SF V2DF])
12467
+;; All Float modes.
12468
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
12470
;; Vector Float modes with 2 elements.
12471
(define_mode_iterator V2F [V2SF V2DF])
12473
@@ -213,13 +216,6 @@
12474
UNSPEC_URSHL ; Used in aarch64-simd.md.
12475
UNSPEC_SQRSHL ; Used in aarch64-simd.md.
12476
UNSPEC_UQRSHL ; Used in aarch64-simd.md.
12477
- UNSPEC_CMEQ ; Used in aarch64-simd.md.
12478
- UNSPEC_CMLE ; Used in aarch64-simd.md.
12479
- UNSPEC_CMLT ; Used in aarch64-simd.md.
12480
- UNSPEC_CMGE ; Used in aarch64-simd.md.
12481
- UNSPEC_CMGT ; Used in aarch64-simd.md.
12482
- UNSPEC_CMHS ; Used in aarch64-simd.md.
12483
- UNSPEC_CMHI ; Used in aarch64-simd.md.
12484
UNSPEC_SSLI ; Used in aarch64-simd.md.
12485
UNSPEC_USLI ; Used in aarch64-simd.md.
12486
UNSPEC_SSRI ; Used in aarch64-simd.md.
12487
@@ -227,10 +223,8 @@
12488
UNSPEC_SSHLL ; Used in aarch64-simd.md.
12489
UNSPEC_USHLL ; Used in aarch64-simd.md.
12490
UNSPEC_ADDP ; Used in aarch64-simd.md.
12491
- UNSPEC_CMTST ; Used in aarch64-simd.md.
12492
UNSPEC_FMAX ; Used in aarch64-simd.md.
12493
UNSPEC_FMIN ; Used in aarch64-simd.md.
12494
- UNSPEC_BSL ; Used in aarch64-simd.md.
12495
UNSPEC_TBL ; Used in vector permute patterns.
12496
UNSPEC_CONCAT ; Used in vector permute patterns.
12497
UNSPEC_ZIP1 ; Used in vector permute patterns.
12498
@@ -249,8 +243,12 @@
12499
;; 32-bit version and "%x0" in the 64-bit version.
12500
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
12502
+;; For constraints used in scalar immediate vector moves
12503
+(define_mode_attr hq [(HI "h") (QI "q")])
12505
;; For scalar usage of vector/FP registers
12506
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
12507
+ (SF "s") (DF "d")
12508
(V8QI "") (V16QI "")
12509
(V4HI "") (V8HI "")
12510
(V2SI "") (V4SI "")
12511
@@ -305,7 +303,8 @@
12512
(V4SF ".4s") (V2DF ".2d")
12519
;; Register suffix narrowed modes for VQN.
12520
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
12521
@@ -444,7 +443,8 @@
12522
(V2SI "V2SI") (V4SI "V4SI")
12523
(DI "DI") (V2DI "V2DI")
12524
(V2SF "V2SI") (V4SF "V4SI")
12526
+ (V2DF "V2DI") (DF "DI")
12529
;; Lower case mode of results of comparison operations.
12530
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
12531
@@ -452,7 +452,8 @@
12532
(V2SI "v2si") (V4SI "v4si")
12533
(DI "di") (V2DI "v2di")
12534
(V2SF "v2si") (V4SF "v4si")
12536
+ (V2DF "v2di") (DF "di")
12539
;; Vm for lane instructions is restricted to FP_LO_REGS.
12540
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
12541
@@ -528,6 +529,9 @@
12542
;; Iterator for integer conversions
12543
(define_code_iterator FIXUORS [fix unsigned_fix])
12545
+;; Iterator for float conversions
12546
+(define_code_iterator FLOATUORS [float unsigned_float])
12548
;; Code iterator for variants of vector max and min.
12549
(define_code_iterator MAXMIN [smax smin umax umin])
12551
@@ -543,6 +547,15 @@
12552
;; Code iterator for signed variants of vector saturating binary ops.
12553
(define_code_iterator SBINQOPS [ss_plus ss_minus])
12555
+;; Comparison operators for <F>CM.
12556
+(define_code_iterator COMPARISONS [lt le eq ge gt])
12558
+;; Unsigned comparison operators.
12559
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
12561
+;; Unsigned comparison operators.
12562
+(define_code_iterator FAC_COMPARISONS [lt le ge gt])
12564
;; -------------------------------------------------------------------
12566
;; -------------------------------------------------------------------
12567
@@ -555,6 +568,10 @@
12568
(zero_extend "zero_extend")
12569
(sign_extract "extv")
12570
(zero_extract "extzv")
12572
+ (unsigned_fix "fixuns")
12574
+ (unsigned_float "floatuns")
12578
@@ -571,12 +588,37 @@
12591
+;; For comparison operators we use the FCM* and CM* instructions.
12592
+;; As there are no CMLE or CMLT instructions which act on 3 vector
12593
+;; operands, we must use CMGE or CMGT and swap the order of the
12594
+;; source operands.
12596
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
12597
+ (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
12598
+(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
12599
+ (ltu "2") (leu "2") (geu "1") (gtu "1")])
12600
+(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
12601
+ (ltu "1") (leu "1") (geu "2") (gtu "2")])
12603
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
12604
+ (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
12606
+(define_code_attr fix_trunc_optab [(fix "fix_trunc")
12607
+ (unsigned_fix "fixuns_trunc")])
12609
;; Optab prefix for sign/zero-extending operations
12610
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
12611
(div "") (udiv "u")
12612
(fix "") (unsigned_fix "u")
12613
+ (float "s") (unsigned_float "u")
12614
(ss_plus "s") (us_plus "u")
12615
(ss_minus "s") (us_minus "u")])
12617
@@ -680,21 +722,19 @@
12618
UNSPEC_SQSHRN UNSPEC_UQSHRN
12619
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
12621
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
12622
- UNSPEC_CMLE UNSPEC_CMLT])
12624
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
12626
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
12627
UNSPEC_TRN1 UNSPEC_TRN2
12628
UNSPEC_UZP1 UNSPEC_UZP2])
12630
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
12631
- UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
12632
+ UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
12635
(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
12637
+ UNSPEC_FRINTA UNSPEC_FRINTN])
12639
+(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
12641
;; -------------------------------------------------------------------
12642
;; Int Iterators Attributes.
12643
;; -------------------------------------------------------------------
12644
@@ -768,12 +808,6 @@
12645
(UNSPEC_RADDHN2 "add")
12646
(UNSPEC_RSUBHN2 "sub")])
12648
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
12649
- (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
12650
- (UNSPEC_CMEQ "eq")
12651
- (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
12652
- (UNSPEC_CMTST "tst")])
12654
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
12655
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])
12657
@@ -783,15 +817,18 @@
12658
(UNSPEC_FRINTM "floor")
12659
(UNSPEC_FRINTI "nearbyint")
12660
(UNSPEC_FRINTX "rint")
12661
- (UNSPEC_FRINTA "round")])
12662
+ (UNSPEC_FRINTA "round")
12663
+ (UNSPEC_FRINTN "frintn")])
12665
;; frint suffix for floating-point rounding instructions.
12666
(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p")
12667
(UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i")
12668
- (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")])
12669
+ (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")
12670
+ (UNSPEC_FRINTN "n")])
12672
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
12673
- (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
12674
+ (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
12675
+ (UNSPEC_FRINTN "frintn")])
12677
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
12678
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
12679
@@ -800,3 +837,5 @@
12680
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
12681
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
12682
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
12684
+(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
12685
--- a/src/gcc/config/aarch64/aarch64.h
12686
+++ b/src/gcc/config/aarch64/aarch64.h
12687
@@ -709,6 +709,8 @@
12689
#define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y)
12691
+#define REVERSIBLE_CC_MODE(MODE) 1
12693
#define REVERSE_CONDITION(CODE, MODE) \
12694
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
12695
? reverse_condition_maybe_unordered (CODE) \
12696
--- a/src/gcc/config/arm/arm-tables.opt
12697
+++ b/src/gcc/config/arm/arm-tables.opt
12698
@@ -250,6 +250,9 @@
12699
Enum(processor_type) String(cortex-a15) Value(cortexa15)
12702
+Enum(processor_type) String(cortex-a53) Value(cortexa53)
12705
Enum(processor_type) String(cortex-r4) Value(cortexr4)
12708
@@ -259,6 +262,9 @@
12709
Enum(processor_type) String(cortex-r5) Value(cortexr5)
12712
+Enum(processor_type) String(cortex-r7) Value(cortexr7)
12715
Enum(processor_type) String(cortex-m4) Value(cortexm4)
12718
--- a/src/gcc/config/arm/arm.c
12719
+++ b/src/gcc/config/arm/arm.c
12720
@@ -173,6 +173,7 @@
12721
static tree arm_builtin_decl (unsigned, bool);
12722
static void emit_constant_insn (rtx cond, rtx pattern);
12723
static rtx emit_set_insn (rtx, rtx);
12724
+static rtx emit_multi_reg_push (unsigned long);
12725
static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
12727
static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
12728
@@ -620,6 +621,13 @@
12729
#undef TARGET_CLASS_LIKELY_SPILLED_P
12730
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
12732
+#undef TARGET_VECTORIZE_BUILTINS
12733
+#define TARGET_VECTORIZE_BUILTINS
12735
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
12736
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
12737
+ arm_builtin_vectorized_function
12739
#undef TARGET_VECTOR_ALIGNMENT
12740
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
12742
@@ -839,6 +847,10 @@
12743
int arm_arch_arm_hwdiv;
12744
int arm_arch_thumb_hwdiv;
12746
+/* Nonzero if we should use Neon to handle 64-bits operations rather
12747
+ than core registers. */
12748
+int prefer_neon_for_64bits = 0;
12750
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
12751
we must report the mode of the memory reference from
12752
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
12753
@@ -936,6 +948,7 @@
12754
false, /* Prefer LDRD/STRD. */
12755
{true, true}, /* Prefer non short circuit. */
12756
&arm_default_vec_cost, /* Vectorizer costs. */
12757
+ false /* Prefer Neon for 64-bits bitops. */
12760
const struct tune_params arm_fastmul_tune =
12761
@@ -950,6 +963,7 @@
12762
false, /* Prefer LDRD/STRD. */
12763
{true, true}, /* Prefer non short circuit. */
12764
&arm_default_vec_cost, /* Vectorizer costs. */
12765
+ false /* Prefer Neon for 64-bits bitops. */
12768
/* StrongARM has early execution of branches, so a sequence that is worth
12769
@@ -967,6 +981,7 @@
12770
false, /* Prefer LDRD/STRD. */
12771
{true, true}, /* Prefer non short circuit. */
12772
&arm_default_vec_cost, /* Vectorizer costs. */
12773
+ false /* Prefer Neon for 64-bits bitops. */
12776
const struct tune_params arm_xscale_tune =
12777
@@ -981,6 +996,7 @@
12778
false, /* Prefer LDRD/STRD. */
12779
{true, true}, /* Prefer non short circuit. */
12780
&arm_default_vec_cost, /* Vectorizer costs. */
12781
+ false /* Prefer Neon for 64-bits bitops. */
12784
const struct tune_params arm_9e_tune =
12785
@@ -995,6 +1011,7 @@
12786
false, /* Prefer LDRD/STRD. */
12787
{true, true}, /* Prefer non short circuit. */
12788
&arm_default_vec_cost, /* Vectorizer costs. */
12789
+ false /* Prefer Neon for 64-bits bitops. */
12792
const struct tune_params arm_v6t2_tune =
12793
@@ -1009,6 +1026,7 @@
12794
false, /* Prefer LDRD/STRD. */
12795
{true, true}, /* Prefer non short circuit. */
12796
&arm_default_vec_cost, /* Vectorizer costs. */
12797
+ false /* Prefer Neon for 64-bits bitops. */
12800
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
12801
@@ -1024,6 +1042,7 @@
12802
false, /* Prefer LDRD/STRD. */
12803
{true, true}, /* Prefer non short circuit. */
12804
&arm_default_vec_cost, /* Vectorizer costs. */
12805
+ false /* Prefer Neon for 64-bits bitops. */
12808
const struct tune_params arm_cortex_a15_tune =
12809
@@ -1038,6 +1057,7 @@
12810
true, /* Prefer LDRD/STRD. */
12811
{true, true}, /* Prefer non short circuit. */
12812
&arm_default_vec_cost, /* Vectorizer costs. */
12813
+ false /* Prefer Neon for 64-bits bitops. */
12816
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
12817
@@ -1055,6 +1075,7 @@
12818
false, /* Prefer LDRD/STRD. */
12819
{false, false}, /* Prefer non short circuit. */
12820
&arm_default_vec_cost, /* Vectorizer costs. */
12821
+ false /* Prefer Neon for 64-bits bitops. */
12824
const struct tune_params arm_cortex_a9_tune =
12825
@@ -1069,6 +1090,7 @@
12826
false, /* Prefer LDRD/STRD. */
12827
{true, true}, /* Prefer non short circuit. */
12828
&arm_default_vec_cost, /* Vectorizer costs. */
12829
+ false /* Prefer Neon for 64-bits bitops. */
12832
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
12833
@@ -1085,6 +1107,7 @@
12834
false, /* Prefer LDRD/STRD. */
12835
{false, false}, /* Prefer non short circuit. */
12836
&arm_default_vec_cost, /* Vectorizer costs. */
12837
+ false /* Prefer Neon for 64-bits bitops. */
12840
const struct tune_params arm_fa726te_tune =
12841
@@ -1099,6 +1122,7 @@
12842
false, /* Prefer LDRD/STRD. */
12843
{true, true}, /* Prefer non short circuit. */
12844
&arm_default_vec_cost, /* Vectorizer costs. */
12845
+ false /* Prefer Neon for 64-bits bitops. */
12849
@@ -2129,6 +2153,12 @@
12850
global_options.x_param_values,
12851
global_options_set.x_param_values);
12853
+ /* Use Neon to perform 64-bits operations rather than core
12855
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
12856
+ if (use_neon_for_64bits == 1)
12857
+ prefer_neon_for_64bits = true;
12859
/* Use the alternative scheduling-pressure algorithm by default. */
12860
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
12861
global_options.x_param_values,
12862
@@ -2382,6 +2412,10 @@
12863
if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
12866
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
12867
+ && !optimize_function_for_size_p (cfun))
12870
offsets = arm_get_frame_offsets ();
12871
stack_adjust = offsets->outgoing_args - offsets->saved_regs;
12873
@@ -2617,6 +2651,9 @@
12878
+ return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
12879
+ && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
12881
return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
12883
@@ -12603,6 +12640,277 @@
12887
+/* Helper for gen_operands_ldrd_strd. Returns true iff the memory
12888
+ operand ADDR is an immediate offset from the base register and is
12889
+ not volatile, in which case it sets BASE and OFFSET
12892
+mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
12894
+ /* TODO: Handle more general memory operand patterns, such as
12895
+ PRE_DEC and PRE_INC. */
12897
+ /* Convert a subreg of mem into mem itself. */
12898
+ if (GET_CODE (addr) == SUBREG)
12899
+ addr = alter_subreg (&addr, true);
12901
+ gcc_assert (MEM_P (addr));
12903
+ /* Don't modify volatile memory accesses. */
12904
+ if (MEM_VOLATILE_P (addr))
12907
+ *offset = const0_rtx;
12909
+ addr = XEXP (addr, 0);
12910
+ if (REG_P (addr))
12915
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
12917
+ *base = XEXP (addr, 0);
12918
+ *offset = XEXP (addr, 1);
12919
+ return (REG_P (*base) && CONST_INT_P (*offset));
12925
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
12927
+/* Called from a peephole2 to replace two word-size accesses with a
12928
+ single LDRD/STRD instruction. Returns true iff we can generate a
12929
+ new instruction sequence. That is, both accesses use the same base
12930
+ register and the gap between constant offsets is 4. This function
12931
+ may reorder its operands to match ldrd/strd RTL templates.
12932
+ OPERANDS are the operands found by the peephole matcher;
12933
+ OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
12934
+ corresponding memory operands. LOAD indicaates whether the access
12935
+ is load or store. CONST_STORE indicates a store of constant
12936
+ integer values held in OPERANDS[4,5] and assumes that the pattern
12937
+ is of length 4 insn, for the purpose of checking dead registers.
12938
+ COMMUTE indicates that register operands may be reordered. */
12940
+gen_operands_ldrd_strd (rtx *operands, bool load,
12941
+ bool const_store, bool commute)
12944
+ HOST_WIDE_INT offsets[2], offset;
12945
+ rtx base = NULL_RTX;
12946
+ rtx cur_base, cur_offset, tmp;
12948
+ HARD_REG_SET regset;
12950
+ gcc_assert (!const_store || !load);
12951
+ /* Check that the memory references are immediate offsets from the
12952
+ same base register. Extract the base register, the destination
12953
+ registers, and the corresponding memory offsets. */
12954
+ for (i = 0; i < nops; i++)
12956
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
12961
+ else if (REGNO (base) != REGNO (cur_base))
12964
+ offsets[i] = INTVAL (cur_offset);
12965
+ if (GET_CODE (operands[i]) == SUBREG)
12967
+ tmp = SUBREG_REG (operands[i]);
12968
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
12969
+ operands[i] = tmp;
12973
+ /* Make sure there is no dependency between the individual loads. */
12974
+ if (load && REGNO (operands[0]) == REGNO (base))
12975
+ return false; /* RAW */
12977
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
12978
+ return false; /* WAW */
12980
+ /* If the same input register is used in both stores
12981
+ when storing different constants, try to find a free register.
12982
+ For example, the code
12987
+ can be transformed into
12989
+ strd r1, r0, [r2]
12990
+ in Thumb mode assuming that r1 is free. */
12992
+ && REGNO (operands[0]) == REGNO (operands[1])
12993
+ && INTVAL (operands[4]) != INTVAL (operands[5]))
12995
+ if (TARGET_THUMB2)
12997
+ CLEAR_HARD_REG_SET (regset);
12998
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
12999
+ if (tmp == NULL_RTX)
13002
+ /* Use the new register in the first load to ensure that
13003
+ if the original input register is not dead after peephole,
13004
+ then it will have the correct constant value. */
13005
+ operands[0] = tmp;
13007
+ else if (TARGET_ARM)
13010
+ int regno = REGNO (operands[0]);
13011
+ if (!peep2_reg_dead_p (4, operands[0]))
13013
+ /* When the input register is even and is not dead after the
13014
+ pattern, it has to hold the second constant but we cannot
13015
+ form a legal STRD in ARM mode with this register as the second
13017
+ if (regno % 2 == 0)
13020
+ /* Is regno-1 free? */
13021
+ SET_HARD_REG_SET (regset);
13022
+ CLEAR_HARD_REG_BIT(regset, regno - 1);
13023
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
13024
+ if (tmp == NULL_RTX)
13027
+ operands[0] = tmp;
13031
+ /* Find a DImode register. */
13032
+ CLEAR_HARD_REG_SET (regset);
13033
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
13034
+ if (tmp != NULL_RTX)
13036
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13037
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13041
+ /* Can we use the input register to form a DI register? */
13042
+ SET_HARD_REG_SET (regset);
13043
+ CLEAR_HARD_REG_BIT(regset,
13044
+ regno % 2 == 0 ? regno + 1 : regno - 1);
13045
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
13046
+ if (tmp == NULL_RTX)
13048
+ operands[regno % 2 == 1 ? 0 : 1] = tmp;
13052
+ gcc_assert (operands[0] != NULL_RTX);
13053
+ gcc_assert (operands[1] != NULL_RTX);
13054
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
13055
+ gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
13059
+ /* Make sure the instructions are ordered with lower memory access first. */
13060
+ if (offsets[0] > offsets[1])
13062
+ gap = offsets[0] - offsets[1];
13063
+ offset = offsets[1];
13065
+ /* Swap the instructions such that lower memory is accessed first. */
13066
+ SWAP_RTX (operands[0], operands[1]);
13067
+ SWAP_RTX (operands[2], operands[3]);
13069
+ SWAP_RTX (operands[4], operands[5]);
13073
+ gap = offsets[1] - offsets[0];
13074
+ offset = offsets[0];
13077
+ /* Make sure accesses are to consecutive memory locations. */
13081
+ /* Make sure we generate legal instructions. */
13082
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13086
+ /* In Thumb state, where registers are almost unconstrained, there
13087
+ is little hope to fix it. */
13088
+ if (TARGET_THUMB2)
13091
+ if (load && commute)
13093
+ /* Try reordering registers. */
13094
+ SWAP_RTX (operands[0], operands[1]);
13095
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13102
+ /* If input registers are dead after this pattern, they can be
13103
+ reordered or replaced by other registers that are free in the
13104
+ current pattern. */
13105
+ if (!peep2_reg_dead_p (4, operands[0])
13106
+ || !peep2_reg_dead_p (4, operands[1]))
13109
+ /* Try to reorder the input registers. */
13110
+ /* For example, the code
13115
+ can be transformed into
13120
+ if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
13123
+ SWAP_RTX (operands[0], operands[1]);
13127
+ /* Try to find a free DI register. */
13128
+ CLEAR_HARD_REG_SET (regset);
13129
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
13130
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
13133
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
13134
+ if (tmp == NULL_RTX)
13137
+ /* DREG must be an even-numbered register in DImode.
13138
+ Split it into SI registers. */
13139
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13140
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13141
+ gcc_assert (operands[0] != NULL_RTX);
13142
+ gcc_assert (operands[1] != NULL_RTX);
13143
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
13144
+ gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
13146
+ return (operands_ok_ldrd_strd (operands[0], operands[1],
13159
/* Print a symbolic form of X to the debug file, F. */
13161
@@ -14794,7 +15102,8 @@
13163
/* Constraints should ensure this. */
13164
gcc_assert (code0 == MEM && code1 == REG);
13165
- gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13166
+ gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
13167
+ || (TARGET_ARM && TARGET_LDRD));
13169
switch (GET_CODE (XEXP (operands[0], 0)))
13171
@@ -16387,6 +16696,148 @@
13175
+/* STRD in ARM mode requires consecutive registers. This function emits STRD
13176
+ whenever possible, otherwise it emits single-word stores. The first store
13177
+ also allocates stack space for all saved registers, using writeback with
13178
+ post-addressing mode. All other stores use offset addressing. If no STRD
13179
+ can be emitted, this function emits a sequence of single-word stores,
13180
+ and not an STM as before, because single-word stores provide more freedom
13181
+ scheduling and can be turned into an STM by peephole optimizations. */
13183
+arm_emit_strd_push (unsigned long saved_regs_mask)
13185
+ int num_regs = 0;
13186
+ int i, j, dwarf_index = 0;
13188
+ rtx dwarf = NULL_RTX;
13189
+ rtx insn = NULL_RTX;
13192
+ /* TODO: A more efficient code can be emitted by changing the
13193
+ layout, e.g., first push all pairs that can use STRD to keep the
13194
+ stack aligned, and then push all other registers. */
13195
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
13196
+ if (saved_regs_mask & (1 << i))
13199
+ gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
13200
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
13201
+ gcc_assert (num_regs > 0);
13203
+ /* Create sequence for DWARF info. */
13204
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
13206
+ /* For dwarf info, we generate explicit stack update. */
13207
+ tmp = gen_rtx_SET (VOIDmode,
13208
+ stack_pointer_rtx,
13209
+ plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
13210
+ RTX_FRAME_RELATED_P (tmp) = 1;
13211
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
13213
+ /* Save registers. */
13214
+ offset = - 4 * num_regs;
13216
+ while (j <= LAST_ARM_REGNUM)
13217
+ if (saved_regs_mask & (1 << j))
13220
+ && (saved_regs_mask & (1 << (j + 1))))
13222
+ /* Current register and previous register form register pair for
13223
+ which STRD can be generated. */
13226
+ /* Allocate stack space for all saved registers. */
13227
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
13228
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
13229
+ mem = gen_frame_mem (DImode, tmp);
13232
+ else if (offset > 0)
13233
+ mem = gen_frame_mem (DImode,
13234
+ plus_constant (Pmode,
13235
+ stack_pointer_rtx,
13238
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
13240
+ tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
13241
+ RTX_FRAME_RELATED_P (tmp) = 1;
13242
+ tmp = emit_insn (tmp);
13244
+ /* Record the first store insn. */
13245
+ if (dwarf_index == 1)
13248
+ /* Generate dwarf info. */
13249
+ mem = gen_frame_mem (SImode,
13250
+ plus_constant (Pmode,
13251
+ stack_pointer_rtx,
13253
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
13254
+ RTX_FRAME_RELATED_P (tmp) = 1;
13255
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
13257
+ mem = gen_frame_mem (SImode,
13258
+ plus_constant (Pmode,
13259
+ stack_pointer_rtx,
13261
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
13262
+ RTX_FRAME_RELATED_P (tmp) = 1;
13263
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
13270
+ /* Emit a single word store. */
13273
+ /* Allocate stack space for all saved registers. */
13274
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
13275
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
13276
+ mem = gen_frame_mem (SImode, tmp);
13279
+ else if (offset > 0)
13280
+ mem = gen_frame_mem (SImode,
13281
+ plus_constant (Pmode,
13282
+ stack_pointer_rtx,
13285
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
13287
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
13288
+ RTX_FRAME_RELATED_P (tmp) = 1;
13289
+ tmp = emit_insn (tmp);
13291
+ /* Record the first store insn. */
13292
+ if (dwarf_index == 1)
13295
+ /* Generate dwarf info. */
13296
+ mem = gen_frame_mem (SImode,
13297
+ plus_constant(Pmode,
13298
+ stack_pointer_rtx,
13300
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
13301
+ RTX_FRAME_RELATED_P (tmp) = 1;
13302
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
13311
+ /* Attach dwarf info to the first insn we generate. */
13312
+ gcc_assert (insn != NULL_RTX);
13313
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13314
+ RTX_FRAME_RELATED_P (insn) = 1;
13317
/* Generate and emit an insn that we will recognize as a push_multi.
13318
Unfortunately, since this insn does not reflect very well the actual
13319
semantics of the operation, we need to annotate the insn for the benefit
13320
@@ -16586,6 +17037,17 @@
13321
if (saved_regs_mask & (1 << i))
13323
reg = gen_rtx_REG (SImode, i);
13324
+ if ((num_regs == 1) && emit_update && !return_in_pc)
13326
+ /* Emit single load with writeback. */
13327
+ tmp = gen_frame_mem (SImode,
13328
+ gen_rtx_POST_INC (Pmode,
13329
+ stack_pointer_rtx));
13330
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
13331
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
13335
tmp = gen_rtx_SET (VOIDmode,
13338
@@ -16817,6 +17279,129 @@
13342
+/* LDRD in ARM mode needs consecutive registers as operands. This function
13343
+ emits LDRD whenever possible, otherwise it emits single-word loads. It uses
13344
+ offset addressing and then generates one separate stack udpate. This provides
13345
+ more scheduling freedom, compared to writeback on every load. However,
13346
+ if the function returns using load into PC directly
13347
+ (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
13348
+ before the last load. TODO: Add a peephole optimization to recognize
13349
+ the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
13350
+ peephole optimization to merge the load at stack-offset zero
13351
+ with the stack update instruction using load with writeback
13352
+ in post-index addressing mode. */
13354
+arm_emit_ldrd_pop (unsigned long saved_regs_mask)
13358
+ rtx par = NULL_RTX;
13359
+ rtx dwarf = NULL_RTX;
13362
+ /* Restore saved registers. */
13363
+ gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
13365
+ while (j <= LAST_ARM_REGNUM)
13366
+ if (saved_regs_mask & (1 << j))
13369
+ && (saved_regs_mask & (1 << (j + 1)))
13370
+ && (j + 1) != PC_REGNUM)
13372
+ /* Current register and next register form register pair for which
13373
+ LDRD can be generated. PC is always the last register popped, and
13374
+ we handle it separately. */
13376
+ mem = gen_frame_mem (DImode,
13377
+ plus_constant (Pmode,
13378
+ stack_pointer_rtx,
13381
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
13383
+ tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
13384
+ RTX_FRAME_RELATED_P (tmp) = 1;
13385
+ tmp = emit_insn (tmp);
13387
+ /* Generate dwarf info. */
13389
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
13390
+ gen_rtx_REG (SImode, j),
13392
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
13393
+ gen_rtx_REG (SImode, j + 1),
13396
+ REG_NOTES (tmp) = dwarf;
13401
+ else if (j != PC_REGNUM)
13403
+ /* Emit a single word load. */
13405
+ mem = gen_frame_mem (SImode,
13406
+ plus_constant (Pmode,
13407
+ stack_pointer_rtx,
13410
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
13412
+ tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
13413
+ RTX_FRAME_RELATED_P (tmp) = 1;
13414
+ tmp = emit_insn (tmp);
13416
+ /* Generate dwarf info. */
13417
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
13418
+ gen_rtx_REG (SImode, j),
13424
+ else /* j == PC_REGNUM */
13430
+ /* Update the stack. */
13433
+ tmp = gen_rtx_SET (Pmode,
13434
+ stack_pointer_rtx,
13435
+ plus_constant (Pmode,
13436
+ stack_pointer_rtx,
13438
+ RTX_FRAME_RELATED_P (tmp) = 1;
13443
+ if (saved_regs_mask & (1 << PC_REGNUM))
13445
+ /* Only PC is to be popped. */
13446
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
13447
+ XVECEXP (par, 0, 0) = ret_rtx;
13448
+ tmp = gen_rtx_SET (SImode,
13449
+ gen_rtx_REG (SImode, PC_REGNUM),
13450
+ gen_frame_mem (SImode,
13451
+ gen_rtx_POST_INC (SImode,
13452
+ stack_pointer_rtx)));
13453
+ RTX_FRAME_RELATED_P (tmp) = 1;
13454
+ XVECEXP (par, 0, 1) = tmp;
13455
+ par = emit_jump_insn (par);
13457
+ /* Generate dwarf info. */
13458
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
13459
+ gen_rtx_REG (SImode, PC_REGNUM),
13461
+ REG_NOTES (par) = dwarf;
13465
/* Calculate the size of the return value that is passed in registers. */
13467
arm_size_return_regs (void)
13468
@@ -17026,9 +17611,10 @@
13469
/* If it is safe to use r3, then do so. This sometimes
13470
generates better code on Thumb-2 by avoiding the need to
13471
use 32-bit push/pop instructions. */
13472
- if (! any_sibcall_uses_r3 ()
13473
+ if (! any_sibcall_uses_r3 ()
13474
&& arm_size_return_regs () <= 12
13475
- && (offsets->saved_regs_mask & (1 << 3)) == 0)
13476
+ && (offsets->saved_regs_mask & (1 << 3)) == 0
13477
+ && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
13481
@@ -17460,6 +18046,12 @@
13483
thumb2_emit_strd_push (live_regs_mask);
13485
+ else if (TARGET_ARM
13486
+ && !TARGET_APCS_FRAME
13487
+ && !IS_INTERRUPT (func_type))
13489
+ arm_emit_strd_push (live_regs_mask);
13493
insn = emit_multi_reg_push (live_regs_mask);
13494
@@ -19339,6 +19931,7 @@
13502
@@ -19356,14 +19949,15 @@
13503
#define TYPE_MODE_BIT(X) (1 << (X))
13505
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
13506
- | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
13507
- | TYPE_MODE_BIT (T_DI))
13508
+ | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
13509
+ | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
13510
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
13511
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
13512
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
13514
#define v8qi_UP T_V8QI
13515
#define v4hi_UP T_V4HI
13516
+#define v4hf_UP T_V4HF
13517
#define v2si_UP T_V2SI
13518
#define v2sf_UP T_V2SF
13520
@@ -19399,6 +19993,8 @@
13524
+ NEON_FLOAT_WIDEN,
13525
+ NEON_FLOAT_NARROW,
13529
@@ -19459,7 +20055,8 @@
13530
VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
13531
{#N, NEON_##T, UP (J), CF (N, J), 0}
13533
-/* The mode entries in the following table correspond to the "key" type of the
13534
+/* The NEON builtin data can be found in arm_neon_builtins.def.
13535
+ The mode entries in the following table correspond to the "key" type of the
13536
instruction variant, i.e. equivalent to that which would be specified after
13537
the assembler mnemonic, which usually refers to the last vector operand.
13538
(Signed/unsigned/polynomial types are not differentiated between though, and
13539
@@ -19469,196 +20066,7 @@
13541
static neon_builtin_datum neon_builtin_data[] =
13543
- VAR10 (BINOP, vadd,
13544
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13545
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
13546
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
13547
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13548
- VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13549
- VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
13550
- VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13551
- VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13552
- VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
13553
- VAR2 (TERNOP, vfma, v2sf, v4sf),
13554
- VAR2 (TERNOP, vfms, v2sf, v4sf),
13555
- VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13556
- VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
13557
- VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
13558
- VAR2 (TERNOP, vqdmlal, v4hi, v2si),
13559
- VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
13560
- VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
13561
- VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
13562
- VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
13563
- VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
13564
- VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
13565
- VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
13566
- VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
13567
- VAR2 (BINOP, vqdmull, v4hi, v2si),
13568
- VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13569
- VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13570
- VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13571
- VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
13572
- VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
13573
- VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
13574
- VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13575
- VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13576
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13577
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
13578
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13579
- VAR10 (BINOP, vsub,
13580
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13581
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
13582
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
13583
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13584
- VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13585
- VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
13586
- VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13587
- VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13588
- VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13589
- VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13590
- VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13591
- VAR2 (BINOP, vcage, v2sf, v4sf),
13592
- VAR2 (BINOP, vcagt, v2sf, v4sf),
13593
- VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13594
- VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13595
- VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
13596
- VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13597
- VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
13598
- VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13599
- VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13600
- VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
13601
- VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13602
- VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13603
- VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
13604
- VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
13605
- VAR2 (BINOP, vrecps, v2sf, v4sf),
13606
- VAR2 (BINOP, vrsqrts, v2sf, v4sf),
13607
- VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13608
- VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
13609
- VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13610
- VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13611
- VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13612
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13613
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13614
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13615
- VAR2 (UNOP, vcnt, v8qi, v16qi),
13616
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
13617
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
13618
- VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
13619
- /* FIXME: vget_lane supports more variants than this! */
13620
- VAR10 (GETLANE, vget_lane,
13621
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13622
- VAR10 (SETLANE, vset_lane,
13623
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13624
- VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
13625
- VAR10 (DUP, vdup_n,
13626
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13627
- VAR10 (DUPLANE, vdup_lane,
13628
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13629
- VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
13630
- VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
13631
- VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
13632
- VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
13633
- VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
13634
- VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
13635
- VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
13636
- VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13637
- VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13638
- VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
13639
- VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
13640
- VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13641
- VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
13642
- VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
13643
- VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13644
- VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13645
- VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
13646
- VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
13647
- VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13648
- VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
13649
- VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
13650
- VAR10 (BINOP, vext,
13651
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13652
- VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13653
- VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
13654
- VAR2 (UNOP, vrev16, v8qi, v16qi),
13655
- VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
13656
- VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
13657
- VAR10 (SELECT, vbsl,
13658
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13659
- VAR2 (RINT, vrintn, v2sf, v4sf),
13660
- VAR2 (RINT, vrinta, v2sf, v4sf),
13661
- VAR2 (RINT, vrintp, v2sf, v4sf),
13662
- VAR2 (RINT, vrintm, v2sf, v4sf),
13663
- VAR2 (RINT, vrintz, v2sf, v4sf),
13664
- VAR2 (RINT, vrintx, v2sf, v4sf),
13665
- VAR1 (VTBL, vtbl1, v8qi),
13666
- VAR1 (VTBL, vtbl2, v8qi),
13667
- VAR1 (VTBL, vtbl3, v8qi),
13668
- VAR1 (VTBL, vtbl4, v8qi),
13669
- VAR1 (VTBX, vtbx1, v8qi),
13670
- VAR1 (VTBX, vtbx2, v8qi),
13671
- VAR1 (VTBX, vtbx3, v8qi),
13672
- VAR1 (VTBX, vtbx4, v8qi),
13673
- VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13674
- VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13675
- VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
13676
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
13677
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
13678
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
13679
- VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
13680
- VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
13681
- VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
13682
- VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
13683
- VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
13684
- VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
13685
- VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
13686
- VAR10 (LOAD1, vld1,
13687
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13688
- VAR10 (LOAD1LANE, vld1_lane,
13689
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13690
- VAR10 (LOAD1, vld1_dup,
13691
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13692
- VAR10 (STORE1, vst1,
13693
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13694
- VAR10 (STORE1LANE, vst1_lane,
13695
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13696
- VAR9 (LOADSTRUCT,
13697
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13698
- VAR7 (LOADSTRUCTLANE, vld2_lane,
13699
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13700
- VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
13701
- VAR9 (STORESTRUCT, vst2,
13702
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13703
- VAR7 (STORESTRUCTLANE, vst2_lane,
13704
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13705
- VAR9 (LOADSTRUCT,
13706
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13707
- VAR7 (LOADSTRUCTLANE, vld3_lane,
13708
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13709
- VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
13710
- VAR9 (STORESTRUCT, vst3,
13711
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13712
- VAR7 (STORESTRUCTLANE, vst3_lane,
13713
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13714
- VAR9 (LOADSTRUCT, vld4,
13715
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13716
- VAR7 (LOADSTRUCTLANE, vld4_lane,
13717
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13718
- VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
13719
- VAR9 (STORESTRUCT, vst4,
13720
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
13721
- VAR7 (STORESTRUCTLANE, vst4_lane,
13722
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
13723
- VAR10 (LOGICBINOP, vand,
13724
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13725
- VAR10 (LOGICBINOP, vorr,
13726
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13727
- VAR10 (BINOP, veor,
13728
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13729
- VAR10 (LOGICBINOP, vbic,
13730
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
13731
- VAR10 (LOGICBINOP, vorn,
13732
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
13733
+#include "arm_neon_builtins.def"
13737
@@ -19673,9 +20081,36 @@
13741
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
13742
- symbolic names defined here (which would require too much duplication).
13744
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
13745
+#define VAR1(T, N, A) \
13747
+#define VAR2(T, N, A, B) \
13748
+ VAR1 (T, N, A), \
13750
+#define VAR3(T, N, A, B, C) \
13751
+ VAR2 (T, N, A, B), \
13753
+#define VAR4(T, N, A, B, C, D) \
13754
+ VAR3 (T, N, A, B, C), \
13756
+#define VAR5(T, N, A, B, C, D, E) \
13757
+ VAR4 (T, N, A, B, C, D), \
13759
+#define VAR6(T, N, A, B, C, D, E, F) \
13760
+ VAR5 (T, N, A, B, C, D, E), \
13762
+#define VAR7(T, N, A, B, C, D, E, F, G) \
13763
+ VAR6 (T, N, A, B, C, D, E, F), \
13765
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
13766
+ VAR7 (T, N, A, B, C, D, E, F, G), \
13768
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
13769
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
13771
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
13772
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
13776
ARM_BUILTIN_GETWCGR0,
13777
@@ -19924,11 +20359,25 @@
13779
ARM_BUILTIN_WMERGE,
13781
- ARM_BUILTIN_NEON_BASE,
13782
+#include "arm_neon_builtins.def"
13784
- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
13788
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
13802
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
13805
@@ -19939,6 +20388,7 @@
13807
tree neon_intQI_type_node;
13808
tree neon_intHI_type_node;
13809
+ tree neon_floatHF_type_node;
13810
tree neon_polyQI_type_node;
13811
tree neon_polyHI_type_node;
13812
tree neon_intSI_type_node;
13813
@@ -19965,6 +20415,7 @@
13815
tree V8QI_type_node;
13816
tree V4HI_type_node;
13817
+ tree V4HF_type_node;
13818
tree V2SI_type_node;
13819
tree V2SF_type_node;
13820
tree V16QI_type_node;
13821
@@ -20019,6 +20470,9 @@
13822
neon_float_type_node = make_node (REAL_TYPE);
13823
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
13824
layout_type (neon_float_type_node);
13825
+ neon_floatHF_type_node = make_node (REAL_TYPE);
13826
+ TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
13827
+ layout_type (neon_floatHF_type_node);
13829
/* Define typedefs which exactly correspond to the modes we are basing vector
13830
types on. If you change these names you'll need to change
13831
@@ -20027,6 +20481,8 @@
13832
"__builtin_neon_qi");
13833
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
13834
"__builtin_neon_hi");
13835
+ (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
13836
+ "__builtin_neon_hf");
13837
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
13838
"__builtin_neon_si");
13839
(*lang_hooks.types.register_builtin_type) (neon_float_type_node,
13840
@@ -20068,6 +20524,8 @@
13841
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
13843
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
13845
+ build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
13847
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
13849
@@ -20190,7 +20648,7 @@
13850
neon_builtin_datum *d = &neon_builtin_data[i];
13852
const char* const modenames[] = {
13853
- "v8qi", "v4hi", "v2si", "v2sf", "di",
13854
+ "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
13855
"v16qi", "v8hi", "v4si", "v4sf", "v2di",
13858
@@ -20393,8 +20851,9 @@
13859
case NEON_REINTERP:
13861
/* We iterate over 5 doubleword types, then 5 quadword
13863
- int rhs = d->mode % 5;
13864
+ types. V4HF is not a type used in reinterpret, so we translate
13865
+ d->mode to the correct index in reinterp_ftype_dreg. */
13866
+ int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
13867
switch (insn_data[d->code].operand[0].mode)
13869
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
13870
@@ -20411,7 +20870,38 @@
13874
+ case NEON_FLOAT_WIDEN:
13876
+ tree eltype = NULL_TREE;
13877
+ tree return_type = NULL_TREE;
13879
+ switch (insn_data[d->code].operand[1].mode)
13882
+ eltype = V4HF_type_node;
13883
+ return_type = V4SF_type_node;
13885
+ default: gcc_unreachable ();
13887
+ ftype = build_function_type_list (return_type, eltype, NULL);
13890
+ case NEON_FLOAT_NARROW:
13892
+ tree eltype = NULL_TREE;
13893
+ tree return_type = NULL_TREE;
13895
+ switch (insn_data[d->code].operand[1].mode)
13898
+ eltype = V4SF_type_node;
13899
+ return_type = V4HF_type_node;
13901
+ default: gcc_unreachable ();
13903
+ ftype = build_function_type_list (return_type, eltype, NULL);
13907
gcc_unreachable ();
13909
@@ -21408,6 +21898,8 @@
13913
+ case NEON_FLOAT_WIDEN:
13914
+ case NEON_FLOAT_NARROW:
13915
case NEON_REINTERP:
13916
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
13917
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
13918
@@ -21605,7 +22097,7 @@
13922
- int fcode = DECL_FUNCTION_CODE (fndecl);
13923
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13925
enum machine_mode tmode;
13926
enum machine_mode mode0;
13927
@@ -23771,6 +24263,8 @@
13930
thumb2_emit_ldrd_pop (saved_regs_mask);
13931
+ else if (TARGET_ARM && !IS_INTERRUPT (func_type))
13932
+ arm_emit_ldrd_pop (saved_regs_mask);
13934
arm_emit_multi_reg_pop (saved_regs_mask);
13936
@@ -25039,7 +25533,7 @@
13938
/* Neon also supports V2SImode, etc. listed in the clause below. */
13939
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
13940
- || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
13941
+ || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
13944
if ((TARGET_NEON || TARGET_IWMMXT)
13945
@@ -25854,6 +26348,7 @@
13953
@@ -25882,6 +26377,7 @@
13954
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
13955
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
13956
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
13957
+ { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
13958
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
13959
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
13960
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
13961
@@ -25980,6 +26476,60 @@
13962
return !TARGET_THUMB1;
13966
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
13968
+ enum machine_mode in_mode, out_mode;
13971
+ if (TREE_CODE (type_out) != VECTOR_TYPE
13972
+ || TREE_CODE (type_in) != VECTOR_TYPE
13973
+ || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
13974
+ return NULL_TREE;
13976
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
13977
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
13978
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
13979
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
13981
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
13982
+ decl of the vectorized builtin for the appropriate vector mode.
13983
+ NULL_TREE is returned if no such builtin is available. */
13984
+#undef ARM_CHECK_BUILTIN_MODE
13985
+#define ARM_CHECK_BUILTIN_MODE(C) \
13986
+ (out_mode == SFmode && out_n == C \
13987
+ && in_mode == SFmode && in_n == C)
13989
+#undef ARM_FIND_VRINT_VARIANT
13990
+#define ARM_FIND_VRINT_VARIANT(N) \
13991
+ (ARM_CHECK_BUILTIN_MODE (2) \
13992
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
13993
+ : (ARM_CHECK_BUILTIN_MODE (4) \
13994
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
13997
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
13999
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
14002
+ case BUILT_IN_FLOORF:
14003
+ return ARM_FIND_VRINT_VARIANT (vrintm);
14004
+ case BUILT_IN_CEILF:
14005
+ return ARM_FIND_VRINT_VARIANT (vrintp);
14006
+ case BUILT_IN_TRUNCF:
14007
+ return ARM_FIND_VRINT_VARIANT (vrintz);
14008
+ case BUILT_IN_ROUNDF:
14009
+ return ARM_FIND_VRINT_VARIANT (vrinta);
14011
+ return NULL_TREE;
14014
+ return NULL_TREE;
14016
+#undef ARM_CHECK_BUILTIN_MODE
14017
+#undef ARM_FIND_VRINT_VARIANT
14019
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
14020
static HOST_WIDE_INT
14021
arm_vector_alignment (const_tree type)
14022
@@ -26210,40 +26760,72 @@
14023
emit_insn (gen_memory_barrier ());
14026
-/* Emit the load-exclusive and store-exclusive instructions. */
14027
+/* Emit the load-exclusive and store-exclusive instructions.
14028
+ Use acquire and release versions if necessary. */
14031
-arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
14032
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
14034
rtx (*gen) (rtx, rtx);
14039
- case QImode: gen = gen_arm_load_exclusiveqi; break;
14040
- case HImode: gen = gen_arm_load_exclusivehi; break;
14041
- case SImode: gen = gen_arm_load_exclusivesi; break;
14042
- case DImode: gen = gen_arm_load_exclusivedi; break;
14044
- gcc_unreachable ();
14047
+ case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
14048
+ case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
14049
+ case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
14050
+ case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
14052
+ gcc_unreachable ();
14059
+ case QImode: gen = gen_arm_load_exclusiveqi; break;
14060
+ case HImode: gen = gen_arm_load_exclusivehi; break;
14061
+ case SImode: gen = gen_arm_load_exclusivesi; break;
14062
+ case DImode: gen = gen_arm_load_exclusivedi; break;
14064
+ gcc_unreachable ();
14068
emit_insn (gen (rval, mem));
14072
-arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
14073
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
14074
+ rtx mem, bool rel)
14076
rtx (*gen) (rtx, rtx, rtx);
14081
- case QImode: gen = gen_arm_store_exclusiveqi; break;
14082
- case HImode: gen = gen_arm_store_exclusivehi; break;
14083
- case SImode: gen = gen_arm_store_exclusivesi; break;
14084
- case DImode: gen = gen_arm_store_exclusivedi; break;
14086
- gcc_unreachable ();
14089
+ case QImode: gen = gen_arm_store_release_exclusiveqi; break;
14090
+ case HImode: gen = gen_arm_store_release_exclusivehi; break;
14091
+ case SImode: gen = gen_arm_store_release_exclusivesi; break;
14092
+ case DImode: gen = gen_arm_store_release_exclusivedi; break;
14094
+ gcc_unreachable ();
14101
+ case QImode: gen = gen_arm_store_exclusiveqi; break;
14102
+ case HImode: gen = gen_arm_store_exclusivehi; break;
14103
+ case SImode: gen = gen_arm_store_exclusivesi; break;
14104
+ case DImode: gen = gen_arm_store_exclusivedi; break;
14106
+ gcc_unreachable ();
14110
emit_insn (gen (bval, rval, mem));
14112
@@ -26278,6 +26860,15 @@
14113
mod_f = operands[7];
14114
mode = GET_MODE (mem);
14116
+ /* Normally the succ memory model must be stronger than fail, but in the
14117
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14118
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
14120
+ if (TARGET_HAVE_LDACQ
14121
+ && INTVAL (mod_f) == MEMMODEL_ACQUIRE
14122
+ && INTVAL (mod_s) == MEMMODEL_RELEASE)
14123
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14128
@@ -26352,8 +26943,20 @@
14129
scratch = operands[7];
14130
mode = GET_MODE (mem);
14132
- arm_pre_atomic_barrier (mod_s);
14133
+ bool use_acquire = TARGET_HAVE_LDACQ
14134
+ && !(mod_s == MEMMODEL_RELAXED
14135
+ || mod_s == MEMMODEL_CONSUME
14136
+ || mod_s == MEMMODEL_RELEASE);
14138
+ bool use_release = TARGET_HAVE_LDACQ
14139
+ && !(mod_s == MEMMODEL_RELAXED
14140
+ || mod_s == MEMMODEL_CONSUME
14141
+ || mod_s == MEMMODEL_ACQUIRE);
14143
+ /* Checks whether a barrier is needed and emits one accordingly. */
14144
+ if (!(use_acquire || use_release))
14145
+ arm_pre_atomic_barrier (mod_s);
14150
@@ -26362,7 +26965,7 @@
14152
label2 = gen_label_rtx ();
14154
- arm_emit_load_exclusive (mode, rval, mem);
14155
+ arm_emit_load_exclusive (mode, rval, mem, use_acquire);
14157
cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
14158
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14159
@@ -26370,7 +26973,7 @@
14160
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14161
emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
14163
- arm_emit_store_exclusive (mode, scratch, mem, newval);
14164
+ arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
14166
/* Weak or strong, we want EQ to be true for success, so that we
14167
match the flags that we got from the compare above. */
14168
@@ -26389,7 +26992,9 @@
14169
if (mod_f != MEMMODEL_RELAXED)
14170
emit_label (label2);
14172
- arm_post_atomic_barrier (mod_s);
14173
+ /* Checks whether a barrier is needed and emits one accordingly. */
14174
+ if (!(use_acquire || use_release))
14175
+ arm_post_atomic_barrier (mod_s);
14177
if (mod_f == MEMMODEL_RELAXED)
14178
emit_label (label2);
14179
@@ -26404,8 +27009,20 @@
14180
enum machine_mode wmode = (mode == DImode ? DImode : SImode);
14183
- arm_pre_atomic_barrier (model);
14184
+ bool use_acquire = TARGET_HAVE_LDACQ
14185
+ && !(model == MEMMODEL_RELAXED
14186
+ || model == MEMMODEL_CONSUME
14187
+ || model == MEMMODEL_RELEASE);
14189
+ bool use_release = TARGET_HAVE_LDACQ
14190
+ && !(model == MEMMODEL_RELAXED
14191
+ || model == MEMMODEL_CONSUME
14192
+ || model == MEMMODEL_ACQUIRE);
14194
+ /* Checks whether a barrier is needed and emits one accordingly. */
14195
+ if (!(use_acquire || use_release))
14196
+ arm_pre_atomic_barrier (model);
14198
label = gen_label_rtx ();
14199
emit_label (label);
14201
@@ -26417,7 +27034,7 @@
14203
value = simplify_gen_subreg (wmode, value, mode, 0);
14205
- arm_emit_load_exclusive (mode, old_out, mem);
14206
+ arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
14210
@@ -26465,12 +27082,15 @@
14214
- arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
14215
+ arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
14218
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14219
emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
14221
- arm_post_atomic_barrier (model);
14222
+ /* Checks whether a barrier is needed and emits one accordingly. */
14223
+ if (!(use_acquire || use_release))
14224
+ arm_post_atomic_barrier (model);
14227
#define MAX_VECT_LEN 16
14228
--- a/src/gcc/config/arm/arm.h
14229
+++ b/src/gcc/config/arm/arm.h
14230
@@ -350,10 +350,16 @@
14231
#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \
14234
+/* Nonzero if this chip supports load-acquire and store-release. */
14235
+#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8)
14237
/* Nonzero if integer division instructions supported. */
14238
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
14239
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
14241
+/* Should NEON be used for 64-bits bitops. */
14242
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
14244
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
14245
then TARGET_AAPCS_BASED must be true -- but the converse does not
14246
hold. TARGET_BPABI implies the use of the BPABI runtime library,
14247
@@ -539,6 +545,10 @@
14248
/* Nonzero if chip supports integer division instruction in Thumb mode. */
14249
extern int arm_arch_thumb_hwdiv;
14251
+/* Nonzero if we should use Neon to handle 64-bits operations rather
14252
+ than core registers. */
14253
+extern int prefer_neon_for_64bits;
14255
#ifndef TARGET_DEFAULT
14256
#define TARGET_DEFAULT (MASK_APCS_FRAME)
14258
@@ -1040,7 +1050,7 @@
14259
/* Modes valid for Neon D registers. */
14260
#define VALID_NEON_DREG_MODE(MODE) \
14261
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
14262
- || (MODE) == V2SFmode || (MODE) == DImode)
14263
+ || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
14265
/* Modes valid for Neon Q registers. */
14266
#define VALID_NEON_QREG_MODE(MODE) \
14267
--- a/src/gcc/config/arm/unspecs.md
14268
+++ b/src/gcc/config/arm/unspecs.md
14270
; FPSCR rounding mode and signal inexactness.
14271
UNSPEC_VRINTA ; Represent a float to integral float rounding
14272
; towards nearest, ties away from zero.
14273
+ UNSPEC_RRX ; Rotate Right with Extend shifts register right
14274
+ ; by one place, with Carry flag shifted into bit[31].
14277
(define_c_enum "unspec" [
14278
@@ -139,6 +141,10 @@
14279
VUNSPEC_ATOMIC_OP ; Represent an atomic operation.
14280
VUNSPEC_LL ; Represent a load-register-exclusive.
14281
VUNSPEC_SC ; Represent a store-register-exclusive.
14282
+ VUNSPEC_LAX ; Represent a load-register-acquire-exclusive.
14283
+ VUNSPEC_SLX ; Represent a store-register-release-exclusive.
14284
+ VUNSPEC_LDA ; Represent a store-register-acquire.
14285
+ VUNSPEC_STL ; Represent a store-register-release.
14288
;; Enumerators for NEON unspecs.
14289
--- a/src/gcc/config/arm/arm-cores.def
14290
+++ b/src/gcc/config/arm/arm-cores.def
14291
@@ -129,9 +129,11 @@
14292
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
14293
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
14294
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
14295
+ARM_CORE("cortex-a53", cortexa53, 8A, FL_LDSCHED, cortex_a5)
14296
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
14297
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
14298
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
14299
+ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
14300
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
14301
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
14302
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
14303
--- a/src/gcc/config/arm/arm-tune.md
14304
+++ b/src/gcc/config/arm/arm-tune.md
14306
;; -*- buffer-read-only: t -*-
14307
;; Generated automatically by gentune.sh from arm-cores.def
14308
(define_attr "tune"
14309
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
14310
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexr4,cortexr4f,cortexr5,cortexr7,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
14311
(const (symbol_ref "((enum attr_tune) arm_tune)")))
14312
--- a/src/gcc/config/arm/arm-protos.h
14313
+++ b/src/gcc/config/arm/arm-protos.h
14315
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
14316
rtx (*) (rtx, rtx, rtx));
14317
extern rtx neon_make_constant (rtx);
14318
+extern tree arm_builtin_vectorized_function (tree, tree, tree);
14319
extern void neon_expand_vector_init (rtx, rtx);
14320
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
14321
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
14322
@@ -117,6 +118,7 @@
14323
extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
14324
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
14325
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
14326
+extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
14327
extern int arm_gen_movmemqi (rtx *);
14328
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
14329
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
14330
@@ -269,6 +271,8 @@
14331
bool logical_op_non_short_circuit[2];
14332
/* Vectorizer costs. */
14333
const struct cpu_vec_costs* vec_costs;
14334
+ /* Prefer Neon for 64-bit bitops. */
14335
+ bool prefer_neon_for_64bits;
14338
extern const struct tune_params *current_tune;
14339
--- a/src/gcc/config/arm/vfp.md
14340
+++ b/src/gcc/config/arm/vfp.md
14341
@@ -132,8 +132,8 @@
14344
(define_insn "*movdi_vfp"
14345
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv")
14346
- (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
14347
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
14348
+ (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
14349
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8
14350
&& ( register_operand (operands[0], DImode)
14351
|| register_operand (operands[1], DImode))
14352
--- a/src/gcc/config/arm/neon.md
14353
+++ b/src/gcc/config/arm/neon.md
14354
@@ -487,7 +487,7 @@
14355
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
14356
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
14357
(set_attr "length" "*,8,8,*,8,8,8")
14358
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
14359
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
14362
(define_insn "*sub<mode>3_neon"
14363
@@ -524,7 +524,7 @@
14364
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
14365
(set_attr "conds" "*,clob,clob,clob,*")
14366
(set_attr "length" "*,8,8,8,*")
14367
- (set_attr "arch" "nota8,*,*,*,onlya8")]
14368
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
14371
(define_insn "*mul<mode>3_neon"
14372
@@ -699,7 +699,7 @@
14374
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
14375
(set_attr "length" "*,*,8,8,*,*")
14376
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
14377
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
14380
;; The concrete forms of the Neon immediate-logic instructions are vbic and
14381
@@ -724,29 +724,6 @@
14382
[(set_attr "neon_type" "neon_int_1")]
14385
-(define_insn "anddi3_neon"
14386
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
14387
- (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
14388
- (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))]
14391
- switch (which_alternative)
14393
- case 0: /* fall through */
14394
- case 4: return "vand\t%P0, %P1, %P2";
14395
- case 1: /* fall through */
14396
- case 5: return neon_output_logic_immediate ("vand", &operands[2],
14397
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
14398
- case 2: return "#";
14399
- case 3: return "#";
14400
- default: gcc_unreachable ();
14403
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
14404
- (set_attr "length" "*,*,8,8,*,*")
14405
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
14408
(define_insn "orn<mode>3_neon"
14409
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
14410
(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
14411
@@ -840,7 +817,7 @@
14412
veor\t%P0, %P1, %P2"
14413
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
14414
(set_attr "length" "*,8,8,*")
14415
- (set_attr "arch" "nota8,*,*,onlya8")]
14416
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
14419
(define_insn "one_cmpl<mode>2"
14420
@@ -1162,7 +1139,7 @@
14424
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
14425
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
14426
(set_attr "opt" "*,*,speed,speed,*,*")]
14429
@@ -1263,7 +1240,7 @@
14433
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
14434
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
14435
(set_attr "opt" "*,*,speed,speed,*,*")]
14438
@@ -3281,6 +3258,24 @@
14439
(const_string "neon_fp_vadd_qqq_vabs_qq")))]
14442
+(define_insn "neon_vcvtv4sfv4hf"
14443
+ [(set (match_operand:V4SF 0 "s_register_operand" "=w")
14444
+ (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
14446
+ "TARGET_NEON && TARGET_FP16"
14447
+ "vcvt.f32.f16\t%q0, %P1"
14448
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
14451
+(define_insn "neon_vcvtv4hfv4sf"
14452
+ [(set (match_operand:V4HF 0 "s_register_operand" "=w")
14453
+ (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
14455
+ "TARGET_NEON && TARGET_FP16"
14456
+ "vcvt.f16.f32\t%P0, %q1"
14457
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
14460
(define_insn "neon_vcvt_n<mode>"
14461
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
14462
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
14463
@@ -5611,7 +5606,7 @@
14464
(match_operand:SI 3 "immediate_operand" "")]
14467
- emit_insn (gen_and<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
14468
+ emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
14472
--- a/src/gcc/config/arm/arm_neon_builtins.def
14473
+++ b/src/gcc/config/arm/arm_neon_builtins.def
14475
+/* NEON builtin definitions for ARM.
14476
+ Copyright (C) 2013
14477
+ Free Software Foundation, Inc.
14478
+ Contributed by ARM Ltd.
14480
+ This file is part of GCC.
14482
+ GCC is free software; you can redistribute it and/or modify it
14483
+ under the terms of the GNU General Public License as published
14484
+ by the Free Software Foundation; either version 3, or (at your
14485
+ option) any later version.
14487
+ GCC is distributed in the hope that it will be useful, but WITHOUT
14488
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14489
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14490
+ License for more details.
14492
+ You should have received a copy of the GNU General Public License
14493
+ along with GCC; see the file COPYING3. If not see
14494
+ <http://www.gnu.org/licenses/>. */
14496
+VAR10 (BINOP, vadd,
14497
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14498
+VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
14499
+VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
14500
+VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14501
+VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14502
+VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
14503
+VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14504
+VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14505
+VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
14506
+VAR2 (TERNOP, vfma, v2sf, v4sf),
14507
+VAR2 (TERNOP, vfms, v2sf, v4sf),
14508
+VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14509
+VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
14510
+VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
14511
+VAR2 (TERNOP, vqdmlal, v4hi, v2si),
14512
+VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
14513
+VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
14514
+VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
14515
+VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
14516
+VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
14517
+VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
14518
+VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
14519
+VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
14520
+VAR2 (BINOP, vqdmull, v4hi, v2si),
14521
+VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14522
+VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14523
+VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14524
+VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
14525
+VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
14526
+VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
14527
+VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14528
+VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14529
+VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14530
+VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
14531
+VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14532
+VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14533
+VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
14534
+VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
14535
+VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14536
+VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14537
+VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
14538
+VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14539
+VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14540
+VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14541
+VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14542
+VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14543
+VAR2 (BINOP, vcage, v2sf, v4sf),
14544
+VAR2 (BINOP, vcagt, v2sf, v4sf),
14545
+VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14546
+VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14547
+VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
14548
+VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14549
+VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
14550
+VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14551
+VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14552
+VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
14553
+VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14554
+VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14555
+VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
14556
+VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
14557
+VAR2 (BINOP, vrecps, v2sf, v4sf),
14558
+VAR2 (BINOP, vrsqrts, v2sf, v4sf),
14559
+VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14560
+VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
14561
+VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14562
+VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14563
+VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14564
+VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14565
+VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14566
+VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14567
+VAR2 (UNOP, vcnt, v8qi, v16qi),
14568
+VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
14569
+VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
14570
+VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
14571
+ /* FIXME: vget_lane supports more variants than this! */
14572
+VAR10 (GETLANE, vget_lane,
14573
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14574
+VAR10 (SETLANE, vset_lane,
14575
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14576
+VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
14577
+VAR10 (DUP, vdup_n,
14578
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14579
+VAR10 (DUPLANE, vdup_lane,
14580
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14581
+VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
14582
+VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
14583
+VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
14584
+VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
14585
+VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
14586
+VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
14587
+VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
14588
+VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14589
+VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14590
+VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
14591
+VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
14592
+VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14593
+VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
14594
+VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
14595
+VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14596
+VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14597
+VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
14598
+VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
14599
+VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14600
+VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
14601
+VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
14602
+VAR10 (BINOP, vext,
14603
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14604
+VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14605
+VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
14606
+VAR2 (UNOP, vrev16, v8qi, v16qi),
14607
+VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
14608
+VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
14609
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
14610
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
14611
+VAR10 (SELECT, vbsl,
14612
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14613
+VAR2 (RINT, vrintn, v2sf, v4sf),
14614
+VAR2 (RINT, vrinta, v2sf, v4sf),
14615
+VAR2 (RINT, vrintp, v2sf, v4sf),
14616
+VAR2 (RINT, vrintm, v2sf, v4sf),
14617
+VAR2 (RINT, vrintz, v2sf, v4sf),
14618
+VAR2 (RINT, vrintx, v2sf, v4sf),
14619
+VAR1 (VTBL, vtbl1, v8qi),
14620
+VAR1 (VTBL, vtbl2, v8qi),
14621
+VAR1 (VTBL, vtbl3, v8qi),
14622
+VAR1 (VTBL, vtbl4, v8qi),
14623
+VAR1 (VTBX, vtbx1, v8qi),
14624
+VAR1 (VTBX, vtbx2, v8qi),
14625
+VAR1 (VTBX, vtbx3, v8qi),
14626
+VAR1 (VTBX, vtbx4, v8qi),
14627
+VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14628
+VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14629
+VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
14630
+VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
14631
+VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
14632
+VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
14633
+VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
14634
+VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
14635
+VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
14636
+VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
14637
+VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
14638
+VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
14639
+VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
14640
+VAR10 (LOAD1, vld1,
14641
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14642
+VAR10 (LOAD1LANE, vld1_lane,
14643
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14644
+VAR10 (LOAD1, vld1_dup,
14645
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14646
+VAR10 (STORE1, vst1,
14647
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14648
+VAR10 (STORE1LANE, vst1_lane,
14649
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14651
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14652
+VAR7 (LOADSTRUCTLANE, vld2_lane,
14653
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14654
+VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
14655
+VAR9 (STORESTRUCT, vst2,
14656
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14657
+VAR7 (STORESTRUCTLANE, vst2_lane,
14658
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14660
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14661
+VAR7 (LOADSTRUCTLANE, vld3_lane,
14662
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14663
+VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
14664
+VAR9 (STORESTRUCT, vst3,
14665
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14666
+VAR7 (STORESTRUCTLANE, vst3_lane,
14667
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14668
+VAR9 (LOADSTRUCT, vld4,
14669
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14670
+VAR7 (LOADSTRUCTLANE, vld4_lane,
14671
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14672
+VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
14673
+VAR9 (STORESTRUCT, vst4,
14674
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
14675
+VAR7 (STORESTRUCTLANE, vst4_lane,
14676
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
14677
+VAR10 (LOGICBINOP, vand,
14678
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14679
+VAR10 (LOGICBINOP, vorr,
14680
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14681
+VAR10 (BINOP, veor,
14682
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14683
+VAR10 (LOGICBINOP, vbic,
14684
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
14685
+VAR10 (LOGICBINOP, vorn,
14686
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
14687
--- a/src/gcc/config/arm/neon.ml
14688
+++ b/src/gcc/config/arm/neon.ml
14690
<http://www.gnu.org/licenses/>. *)
14692
(* Shorthand types for vector elements. *)
14693
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
14694
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
14695
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
14696
| Cast of elts * elts | NoElts
14699
| T_uint16x4 | T_uint16x8
14700
| T_uint32x2 | T_uint32x4
14701
| T_uint64x1 | T_uint64x2
14703
| T_float32x2 | T_float32x4
14704
| T_poly8x8 | T_poly8x16
14705
| T_poly16x4 | T_poly16x8
14706
@@ -46,11 +47,13 @@
14707
| T_uint8 | T_uint16
14708
| T_uint32 | T_uint64
14709
| T_poly8 | T_poly16
14710
- | T_float32 | T_arrayof of int * vectype
14711
+ | T_float16 | T_float32
14712
+ | T_arrayof of int * vectype
14713
| T_ptrto of vectype | T_const of vectype
14715
| T_intHI | T_intSI
14716
- | T_intDI | T_floatSF
14717
+ | T_intDI | T_floatHF
14720
(* The meanings of the following are:
14721
TImode : "Tetra", two registers (four words).
14723
| Arity3 of vectype * vectype * vectype * vectype
14724
| Arity4 of vectype * vectype * vectype * vectype * vectype
14726
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
14727
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
14728
| V16QI | V8HI | V4SI | V4SF | V2DI
14729
| QI | HI | SI | SF
14731
@@ -284,18 +287,22 @@
14733
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
14734
| Requires_feature of string
14735
+ (* Mark that the intrinsic requires a particular architecture version. *)
14736
| Requires_arch of int
14737
+ (* Mark that the intrinsic requires a particular bit in __ARM_FP to
14739
+ | Requires_FP_bit of int
14741
exception MixedMode of elts * elts
14743
let rec elt_width = function
14744
S8 | U8 | P8 | I8 | B8 -> 8
14745
- | S16 | U16 | P16 | I16 | B16 -> 16
14746
+ | S16 | U16 | P16 | I16 | B16 | F16 -> 16
14747
| S32 | F32 | U32 | I32 | B32 -> 32
14748
| S64 | U64 | I64 | B64 -> 64
14750
let wa = elt_width a and wb = elt_width b in
14751
- if wa = wb then wa else failwith "element width?"
14752
+ if wa = wb then wa else raise (MixedMode (a, b))
14753
| Cast (a, b) -> raise (MixedMode (a, b))
14754
| NoElts -> failwith "No elts"
14756
@@ -303,7 +310,7 @@
14757
S8 | S16 | S32 | S64 -> Signed
14758
| U8 | U16 | U32 | U64 -> Unsigned
14761
+ | F16 | F32 -> Float
14762
| I8 | I16 | I32 | I64 -> Int
14763
| B8 | B16 | B32 | B64 -> Bits
14764
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
14765
@@ -315,6 +322,7 @@
14766
| Signed, 16 -> S16
14767
| Signed, 32 -> S32
14768
| Signed, 64 -> S64
14769
+ | Float, 16 -> F16
14771
| Unsigned, 8 -> U8
14772
| Unsigned, 16 -> U16
14773
@@ -384,7 +392,12 @@
14775
scan ((Array.length operands) - 1)
14777
-let rec mode_of_elt elt shape =
14778
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
14779
+ SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
14780
+ for the given argument position, else determine which argument to return a
14781
+ mode for automatically. *)
14783
+let rec mode_of_elt ?argpos elt shape =
14784
let flt = match elt_class elt with
14785
Float | ConvClass(_, Float) -> true | _ -> false in
14787
@@ -394,7 +407,10 @@
14788
in match shape with
14789
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
14790
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
14791
- [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
14793
+ [| V8QI; V4HF; V2SF; DI |].(idx)
14795
+ [| V8QI; V4HI; V2SI; DI |].(idx)
14796
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
14797
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
14798
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
14799
@@ -404,7 +420,11 @@
14801
[| V8QI; V4HI; V2SI; DI |].(idx)
14802
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
14803
- | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
14804
+ | Use_operands ops ->
14805
+ begin match argpos with
14806
+ None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
14807
+ | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
14809
| _ -> failwith "invalid shape"
14811
(* Modify an element type dependent on the shape of the instruction and the
14812
@@ -454,10 +474,11 @@
14813
| U16 -> T_uint16x4
14814
| U32 -> T_uint32x2
14815
| U64 -> T_uint64x1
14816
+ | F16 -> T_float16x4
14817
| F32 -> T_float32x2
14819
| P16 -> T_poly16x4
14820
- | _ -> failwith "Bad elt type"
14821
+ | _ -> failwith "Bad elt type for Dreg"
14824
begin match elt with
14825
@@ -472,7 +493,7 @@
14826
| F32 -> T_float32x4
14828
| P16 -> T_poly16x8
14829
- | _ -> failwith "Bad elt type"
14830
+ | _ -> failwith "Bad elt type for Qreg"
14833
begin match elt with
14834
@@ -487,7 +508,7 @@
14838
- | _ -> failwith "Bad elt type"
14839
+ | _ -> failwith "Bad elt type for Corereg"
14843
@@ -506,7 +527,7 @@
14844
let vectype_size = function
14845
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
14846
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
14847
- | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
14848
+ | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
14849
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
14850
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
14851
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
14852
@@ -1217,6 +1238,10 @@
14853
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
14854
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
14855
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
14856
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
14857
+ Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
14858
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
14859
+ Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
14860
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
14861
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
14862
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
14863
@@ -1782,7 +1807,7 @@
14864
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
14865
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
14866
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
14867
- | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
14868
+ | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
14869
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
14870
| NoElts -> failwith "No elts"
14872
@@ -1809,6 +1834,7 @@
14873
| T_uint32x4 -> affix "uint32x4"
14874
| T_uint64x1 -> affix "uint64x1"
14875
| T_uint64x2 -> affix "uint64x2"
14876
+ | T_float16x4 -> affix "float16x4"
14877
| T_float32x2 -> affix "float32x2"
14878
| T_float32x4 -> affix "float32x4"
14879
| T_poly8x8 -> affix "poly8x8"
14880
@@ -1825,6 +1851,7 @@
14881
| T_uint64 -> affix "uint64"
14882
| T_poly8 -> affix "poly8"
14883
| T_poly16 -> affix "poly16"
14884
+ | T_float16 -> affix "float16"
14885
| T_float32 -> affix "float32"
14886
| T_immediate _ -> "const int"
14888
@@ -1832,6 +1859,7 @@
14889
| T_intHI -> "__builtin_neon_hi"
14890
| T_intSI -> "__builtin_neon_si"
14891
| T_intDI -> "__builtin_neon_di"
14892
+ | T_floatHF -> "__builtin_neon_hf"
14893
| T_floatSF -> "__builtin_neon_sf"
14894
| T_arrayof (num, base) ->
14895
let basename = name (fun x -> x) base in
14896
@@ -1853,10 +1881,10 @@
14897
| B_XImode -> "__builtin_neon_xi"
14899
let string_of_mode = function
14900
- V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
14901
- | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
14902
- | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
14904
+ V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
14905
+ | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
14906
+ | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
14907
+ | HI -> "hi" | SI -> "si" | SF -> "sf"
14909
(* Use uppercase chars for letters which form part of the intrinsic name, but
14910
should be omitted from the builtin name (the info is passed in an extra
14911
--- a/src/gcc/config/arm/constraints.md
14912
+++ b/src/gcc/config/arm/constraints.md
14914
;; The following register constraints have been used:
14915
;; - in ARM/Thumb-2 state: t, w, x, y, z
14916
;; - in Thumb state: h, b
14917
-;; - in both states: l, c, k
14918
+;; - in both states: l, c, k, q
14919
;; In ARM state, 'l' is an alias for 'r'
14920
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
14923
(define_register_constraint "k" "STACK_REG"
14924
"@internal The stack register.")
14926
+(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS"
14927
+ "@internal In ARM state with LDRD support, core registers, otherwise general registers.")
14929
(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
14931
Thumb only. The union of the low registers and the stack register.")
14932
@@ -248,6 +251,12 @@
14933
(and (match_code "const_int")
14934
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
14936
+(define_constraint "De"
14938
+ In ARM/Thumb-2 state a const_int that can be used by insn anddi."
14939
+ (and (match_code "const_int")
14940
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
14942
(define_constraint "Di"
14944
In ARM/Thumb-2 state a const_int or const_double where both the high
14945
--- a/src/gcc/config/arm/arm-arches.def
14946
+++ b/src/gcc/config/arm/arm-arches.def
14948
ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R)
14949
ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M)
14950
ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM)
14951
-ARM_ARCH("armv8-a", cortexa15, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
14952
+ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
14953
ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
14954
ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
14955
--- a/src/gcc/config/arm/t-arm
14956
+++ b/src/gcc/config/arm/t-arm
14958
$(srcdir)/config/arm/cortex-a8-neon.md \
14959
$(srcdir)/config/arm/cortex-a9.md \
14960
$(srcdir)/config/arm/cortex-a9-neon.md \
14961
+ $(srcdir)/config/arm/cortex-a53.md \
14962
$(srcdir)/config/arm/cortex-m4-fpu.md \
14963
$(srcdir)/config/arm/cortex-m4.md \
14964
$(srcdir)/config/arm/cortex-r4f.md \
14966
$(srcdir)/config/arm/iwmmxt.md \
14967
$(srcdir)/config/arm/iwmmxt2.md \
14968
$(srcdir)/config/arm/ldmstm.md \
14969
+ $(srcdir)/config/arm/ldrdstrd.md \
14970
$(srcdir)/config/arm/marvell-f-iwmmxt.md \
14971
$(srcdir)/config/arm/neon.md \
14972
$(srcdir)/config/arm/predicates.md \
14974
$(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
14975
$(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
14976
intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
14977
- $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def
14978
+ $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
14979
+ $(srcdir)/config/arm/arm_neon_builtins.def
14981
arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
14982
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
14983
--- a/src/gcc/config/arm/arm.opt
14984
+++ b/src/gcc/config/arm/arm.opt
14985
@@ -247,3 +247,7 @@
14987
Target Report Var(unaligned_access) Init(2)
14988
Enable unaligned word and halfword accesses to packed data.
14991
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
14992
+Use Neon to perform 64-bits operations rather than core registers.
14993
--- a/src/gcc/config/arm/ldrdstrd.md
14994
+++ b/src/gcc/config/arm/ldrdstrd.md
14996
+;; ARM ldrd/strd peephole optimizations.
14998
+;; Copyright (C) 2013 Free Software Foundation, Inc.
15000
+;; Written by Greta Yorsh <greta.yorsh@arm.com>
15002
+;; This file is part of GCC.
15004
+;; GCC is free software; you can redistribute it and/or modify it
15005
+;; under the terms of the GNU General Public License as published by
15006
+;; the Free Software Foundation; either version 3, or (at your option)
15007
+;; any later version.
15009
+;; GCC is distributed in the hope that it will be useful, but
15010
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
15011
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15012
+;; General Public License for more details.
15014
+;; You should have received a copy of the GNU General Public License
15015
+;; along with GCC; see the file COPYING3. If not see
15016
+;; <http://www.gnu.org/licenses/>.
15018
+;; The following peephole optimizations identify consecutive memory
15019
+;; accesses, and try to rearrange the operands to enable generation of
15022
+(define_peephole2 ; ldrd
15023
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
15024
+ (match_operand:SI 2 "memory_operand" ""))
15025
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
15026
+ (match_operand:SI 3 "memory_operand" ""))]
15028
+ && current_tune->prefer_ldrd_strd
15029
+ && !optimize_function_for_size_p (cfun)"
15032
+ if (!gen_operands_ldrd_strd (operands, true, false, false))
15034
+ else if (TARGET_ARM)
15036
+ /* In ARM state, the destination registers of LDRD/STRD must be
15037
+ consecutive. We emit DImode access. */
15038
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
15039
+ operands[2] = adjust_address (operands[2], DImode, 0);
15040
+ /* Emit [(set (match_dup 0) (match_dup 2))] */
15041
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
15044
+ else if (TARGET_THUMB2)
15046
+ /* Emit the pattern:
15047
+ [(parallel [(set (match_dup 0) (match_dup 2))
15048
+ (set (match_dup 1) (match_dup 3))])] */
15049
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
15050
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
15051
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15056
+(define_peephole2 ; strd
15057
+ [(set (match_operand:SI 2 "memory_operand" "")
15058
+ (match_operand:SI 0 "arm_general_register_operand" ""))
15059
+ (set (match_operand:SI 3 "memory_operand" "")
15060
+ (match_operand:SI 1 "arm_general_register_operand" ""))]
15062
+ && current_tune->prefer_ldrd_strd
15063
+ && !optimize_function_for_size_p (cfun)"
15066
+ if (!gen_operands_ldrd_strd (operands, false, false, false))
15068
+ else if (TARGET_ARM)
15070
+ /* In ARM state, the destination registers of LDRD/STRD must be
15071
+ consecutive. We emit DImode access. */
15072
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
15073
+ operands[2] = adjust_address (operands[2], DImode, 0);
15074
+ /* Emit [(set (match_dup 2) (match_dup 0))] */
15075
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
15078
+ else if (TARGET_THUMB2)
15080
+ /* Emit the pattern:
15081
+ [(parallel [(set (match_dup 2) (match_dup 0))
15082
+ (set (match_dup 3) (match_dup 1))])] */
15083
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
15084
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
15085
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15090
+;; The following peepholes reorder registers to enable LDRD/STRD.
15091
+(define_peephole2 ; strd of constants
15092
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
15093
+ (match_operand:SI 4 "const_int_operand" ""))
15094
+ (set (match_operand:SI 2 "memory_operand" "")
15096
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
15097
+ (match_operand:SI 5 "const_int_operand" ""))
15098
+ (set (match_operand:SI 3 "memory_operand" "")
15101
+ && current_tune->prefer_ldrd_strd
15102
+ && !optimize_function_for_size_p (cfun)"
15105
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
15107
+ else if (TARGET_ARM)
15109
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
15110
+ operands[2] = adjust_address (operands[2], DImode, 0);
15111
+ /* Emit the pattern:
15112
+ [(set (match_dup 0) (match_dup 4))
15113
+ (set (match_dup 1) (match_dup 5))
15114
+ (set (match_dup 2) tmp)] */
15115
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
15116
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
15117
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
15120
+ else if (TARGET_THUMB2)
15122
+ /* Emit the pattern:
15123
+ [(set (match_dup 0) (match_dup 4))
15124
+ (set (match_dup 1) (match_dup 5))
15125
+ (parallel [(set (match_dup 2) (match_dup 0))
15126
+ (set (match_dup 3) (match_dup 1))])] */
15127
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
15128
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
15129
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
15130
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
15131
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15136
+(define_peephole2 ; strd of constants
15137
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
15138
+ (match_operand:SI 4 "const_int_operand" ""))
15139
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
15140
+ (match_operand:SI 5 "const_int_operand" ""))
15141
+ (set (match_operand:SI 2 "memory_operand" "")
15143
+ (set (match_operand:SI 3 "memory_operand" "")
15146
+ && current_tune->prefer_ldrd_strd
15147
+ && !optimize_function_for_size_p (cfun)"
15150
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
15152
+ else if (TARGET_ARM)
15154
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
15155
+ operands[2] = adjust_address (operands[2], DImode, 0);
15156
+ /* Emit the pattern
15157
+ [(set (match_dup 0) (match_dup 4))
15158
+ (set (match_dup 1) (match_dup 5))
15159
+ (set (match_dup 2) tmp)] */
15160
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
15161
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
15162
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
15165
+ else if (TARGET_THUMB2)
15167
+ /* Emit the pattern:
15168
+ [(set (match_dup 0) (match_dup 4))
15169
+ (set (match_dup 1) (match_dup 5))
15170
+ (parallel [(set (match_dup 2) (match_dup 0))
15171
+ (set (match_dup 3) (match_dup 1))])] */
15172
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
15173
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
15174
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
15175
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
15176
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15181
+;; The following two peephole optimizations are only relevant for ARM
15182
+;; mode where LDRD/STRD require consecutive registers.
15184
+(define_peephole2 ; swap the destination registers of two loads
15185
+ ; before a commutative operation.
15186
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
15187
+ (match_operand:SI 2 "memory_operand" ""))
15188
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
15189
+ (match_operand:SI 3 "memory_operand" ""))
15190
+ (set (match_operand:SI 4 "arm_general_register_operand" "")
15191
+ (match_operator:SI 5 "commutative_binary_operator"
15192
+ [(match_operand 6 "arm_general_register_operand" "")
15193
+ (match_operand 7 "arm_general_register_operand" "") ]))]
15194
+ "TARGET_LDRD && TARGET_ARM
15195
+ && current_tune->prefer_ldrd_strd
15196
+ && !optimize_function_for_size_p (cfun)
15197
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
15198
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
15199
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
15200
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
15201
+ [(set (match_dup 0) (match_dup 2))
15202
+ (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
15204
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
15210
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
15211
+ operands[2] = adjust_address (operands[2], DImode, 0);
15216
+(define_peephole2 ; swap the destination registers of two loads
15217
+ ; before a commutative operation that sets the flags.
15218
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
15219
+ (match_operand:SI 2 "memory_operand" ""))
15220
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
15221
+ (match_operand:SI 3 "memory_operand" ""))
15223
+ [(set (match_operand:SI 4 "arm_general_register_operand" "")
15224
+ (match_operator:SI 5 "commutative_binary_operator"
15225
+ [(match_operand 6 "arm_general_register_operand" "")
15226
+ (match_operand 7 "arm_general_register_operand" "") ]))
15227
+ (clobber (reg:CC CC_REGNUM))])]
15228
+ "TARGET_LDRD && TARGET_ARM
15229
+ && current_tune->prefer_ldrd_strd
15230
+ && !optimize_function_for_size_p (cfun)
15231
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
15232
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
15233
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
15234
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
15235
+ [(set (match_dup 0) (match_dup 2))
15237
+ [(set (match_dup 4)
15238
+ (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
15239
+ (clobber (reg:CC CC_REGNUM))])]
15241
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
15247
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
15248
+ operands[2] = adjust_address (operands[2], DImode, 0);
15253
+;; TODO: Handle LDRD/STRD with writeback:
15254
+;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
15255
+;; (b) Patterns may be followed by an update of the base address.
15256
--- a/src/gcc/config/arm/predicates.md
15257
+++ b/src/gcc/config/arm/predicates.md
15259
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
15262
+(define_predicate "imm_for_neon_inv_logic_operand"
15263
+ (match_code "const_vector")
15265
+ return (TARGET_NEON
15266
+ && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
15269
+(define_predicate "neon_inv_logic_op2"
15270
+ (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
15271
+ (match_operand 0 "s_register_operand")))
15273
;; Any hard register.
15274
(define_predicate "arm_hard_register_operand"
15276
@@ -145,6 +156,12 @@
15277
(ior (match_operand 0 "arm_rhs_operand")
15278
(match_operand 0 "arm_neg_immediate_operand")))
15280
+(define_predicate "arm_anddi_operand_neon"
15281
+ (ior (match_operand 0 "s_register_operand")
15282
+ (and (match_code "const_int")
15283
+ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
15284
+ (match_operand 0 "neon_inv_logic_op2")))
15286
(define_predicate "arm_adddi_operand"
15287
(ior (match_operand 0 "s_register_operand")
15288
(and (match_code "const_int")
15289
@@ -270,6 +287,18 @@
15290
(define_special_predicate "lt_ge_comparison_operator"
15291
(match_code "lt,ge"))
15293
+;; The vsel instruction only accepts the ARM condition codes listed below.
15294
+(define_special_predicate "arm_vsel_comparison_operator"
15295
+ (and (match_operand 0 "expandable_comparison_operator")
15296
+ (match_test "maybe_get_arm_condition_code (op) == ARM_GE
15297
+ || maybe_get_arm_condition_code (op) == ARM_GT
15298
+ || maybe_get_arm_condition_code (op) == ARM_EQ
15299
+ || maybe_get_arm_condition_code (op) == ARM_VS
15300
+ || maybe_get_arm_condition_code (op) == ARM_LT
15301
+ || maybe_get_arm_condition_code (op) == ARM_LE
15302
+ || maybe_get_arm_condition_code (op) == ARM_NE
15303
+ || maybe_get_arm_condition_code (op) == ARM_VC")))
15305
(define_special_predicate "noov_comparison_operator"
15306
(match_code "lt,ge,eq,ne"))
15308
@@ -513,21 +542,10 @@
15309
&& neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
15312
-(define_predicate "imm_for_neon_inv_logic_operand"
15313
- (match_code "const_vector")
15315
- return (TARGET_NEON
15316
- && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
15319
(define_predicate "neon_logic_op2"
15320
(ior (match_operand 0 "imm_for_neon_logic_operand")
15321
(match_operand 0 "s_register_operand")))
15323
-(define_predicate "neon_inv_logic_op2"
15324
- (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
15325
- (match_operand 0 "s_register_operand")))
15327
;; Predicates for named expanders that overlap multiple ISAs.
15329
(define_predicate "cmpdi_operand"
15330
--- a/src/gcc/config/arm/arm_neon.h
15331
+++ b/src/gcc/config/arm/arm_neon.h
15333
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
15334
typedef __builtin_neon_di int64x1_t;
15335
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
15336
+typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
15337
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
15338
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
15339
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
15340
@@ -6016,6 +6017,22 @@
15341
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
15344
+#if ((__ARM_FP & 0x2) != 0)
15345
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15346
+vcvt_f16_f32 (float32x4_t __a)
15348
+ return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
15352
+#if ((__ARM_FP & 0x2) != 0)
15353
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15354
+vcvt_f32_f16 (float16x4_t __a)
15356
+ return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
15360
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15361
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
15363
--- a/src/gcc/config/arm/cortex-a53.md
15364
+++ b/src/gcc/config/arm/cortex-a53.md
15366
+;; ARM Cortex-A53 pipeline description
15367
+;; Copyright (C) 2013 Free Software Foundation, Inc.
15369
+;; Contributed by ARM Ltd.
15371
+;; This file is part of GCC.
15373
+;; GCC is free software; you can redistribute it and/or modify it
15374
+;; under the terms of the GNU General Public License as published by
15375
+;; the Free Software Foundation; either version 3, or (at your option)
15376
+;; any later version.
15378
+;; GCC is distributed in the hope that it will be useful, but
15379
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
15380
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15381
+;; General Public License for more details.
15383
+;; You should have received a copy of the GNU General Public License
15384
+;; along with GCC; see the file COPYING3. If not see
15385
+;; <http://www.gnu.org/licenses/>.
15387
+(define_automaton "cortex_a53")
15389
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15390
+;; Functional units.
15391
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15393
+;; There are two main integer execution pipelines, described as
15394
+;; slot 0 and issue slot 1.
15396
+(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
15397
+(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
15399
+(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
15400
+(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
15402
+;; The load/store pipeline. Load/store instructions can dual-issue from
15403
+;; either pipeline, but two load/stores cannot simultaneously issue.
15405
+(define_cpu_unit "cortex_a53_ls" "cortex_a53")
15407
+;; The store pipeline. Shared between both execution pipelines.
15409
+(define_cpu_unit "cortex_a53_store" "cortex_a53")
15411
+;; The branch pipeline. Branches can dual-issue with other instructions
15412
+;; (except when those instructions take multiple cycles to issue).
15414
+(define_cpu_unit "cortex_a53_branch" "cortex_a53")
15416
+;; The integer divider.
15418
+(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
15420
+;; The floating-point add pipeline used to model the usage
15421
+;; of the add pipeline by fmac instructions.
15423
+(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
15425
+;; Floating-point div/sqrt (long latency, out-of-order completion).
15427
+(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
15429
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15430
+;; ALU instructions.
15431
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15433
+(define_insn_reservation "cortex_a53_alu" 2
15434
+ (and (eq_attr "tune" "cortexa53")
15435
+ (eq_attr "type" "alu_reg,simple_alu_imm"))
15436
+ "cortex_a53_slot_any")
15438
+(define_insn_reservation "cortex_a53_alu_shift" 2
15439
+ (and (eq_attr "tune" "cortexa53")
15440
+ (eq_attr "type" "alu_shift,alu_shift_reg"))
15441
+ "cortex_a53_slot_any")
15443
+;; Forwarding path for unshifted operands.
15445
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
15446
+ "cortex_a53_alu")
15448
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
15449
+ "cortex_a53_alu_shift"
15450
+ "arm_no_early_alu_shift_dep")
15452
+;; The multiplier pipeline can forward results so there's no need to specify
15453
+;; bypasses. Multiplies can only single-issue currently.
15455
+(define_insn_reservation "cortex_a53_mul" 3
15456
+ (and (eq_attr "tune" "cortexa53")
15457
+ (eq_attr "type" "mult"))
15458
+ "cortex_a53_single_issue")
15460
+;; A multiply with a single-register result or an MLA, followed by an
15461
+;; MLA with an accumulator dependency, has its result forwarded so two
15462
+;; such instructions can issue back-to-back.
15464
+(define_bypass 1 "cortex_a53_mul"
15466
+ "arm_mac_accumulator_is_mul_result")
15468
+;; Punt with a high enough latency for divides.
15469
+(define_insn_reservation "cortex_a53_udiv" 8
15470
+ (and (eq_attr "tune" "cortexa53")
15471
+ (eq_attr "insn" "udiv"))
15472
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
15474
+(define_insn_reservation "cortex_a53_sdiv" 9
15475
+ (and (eq_attr "tune" "cortexa53")
15476
+ (eq_attr "insn" "sdiv"))
15477
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
15480
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
15481
+ "cortex_a53_alu")
15482
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
15483
+ "cortex_a53_alu_shift"
15484
+ "arm_no_early_alu_shift_dep")
15486
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15487
+;; Load/store instructions.
15488
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15490
+;; Address-generation happens in the issue stage.
15492
+(define_insn_reservation "cortex_a53_load1" 3
15493
+ (and (eq_attr "tune" "cortexa53")
15494
+ (eq_attr "type" "load_byte,load1"))
15495
+ "cortex_a53_slot_any+cortex_a53_ls")
15497
+(define_insn_reservation "cortex_a53_store1" 2
15498
+ (and (eq_attr "tune" "cortexa53")
15499
+ (eq_attr "type" "store1"))
15500
+ "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
15502
+(define_insn_reservation "cortex_a53_load2" 3
15503
+ (and (eq_attr "tune" "cortexa53")
15504
+ (eq_attr "type" "load2"))
15505
+ "cortex_a53_single_issue+cortex_a53_ls")
15507
+(define_insn_reservation "cortex_a53_store2" 2
15508
+ (and (eq_attr "tune" "cortexa53")
15509
+ (eq_attr "type" "store2"))
15510
+ "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
15512
+(define_insn_reservation "cortex_a53_load3plus" 4
15513
+ (and (eq_attr "tune" "cortexa53")
15514
+ (eq_attr "type" "load3,load4"))
15515
+ "(cortex_a53_single_issue+cortex_a53_ls)*2")
15517
+(define_insn_reservation "cortex_a53_store3plus" 3
15518
+ (and (eq_attr "tune" "cortexa53")
15519
+ (eq_attr "type" "store3,store4"))
15520
+ "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
15522
+;; Load/store addresses are required early in Issue.
15523
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
15524
+ "cortex_a53_load*"
15525
+ "arm_early_load_addr_dep")
15526
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
15527
+ "cortex_a53_store*"
15528
+ "arm_early_store_addr_dep")
15530
+;; Load data can forward in the ALU pipeline
15531
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
15532
+ "cortex_a53_alu")
15533
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
15534
+ "cortex_a53_alu_shift"
15535
+ "arm_no_early_alu_shift_dep")
15537
+;; ALU ops can forward to stores.
15538
+(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
15539
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
15540
+ "arm_no_early_store_addr_dep")
15542
+(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
15543
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
15544
+ "arm_no_early_store_addr_dep")
15546
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15548
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15550
+;; Currently models all branches as dual-issuable from either execution
15551
+;; slot, which isn't true for all cases. We still need to model indirect
15554
+(define_insn_reservation "cortex_a53_branch" 0
15555
+ (and (eq_attr "tune" "cortexa53")
15556
+ (eq_attr "type" "branch,call"))
15557
+ "cortex_a53_slot_any+cortex_a53_branch")
15559
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15560
+;; Floating-point arithmetic.
15561
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15563
+(define_insn_reservation "cortex_a53_fpalu" 4
15564
+ (and (eq_attr "tune" "cortexa53")
15565
+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
15567
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
15569
+(define_insn_reservation "cortex_a53_fconst" 2
15570
+ (and (eq_attr "tune" "cortexa53")
15571
+ (eq_attr "type" "fconsts,fconstd"))
15572
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
15574
+(define_insn_reservation "cortex_a53_fpmul" 4
15575
+ (and (eq_attr "tune" "cortexa53")
15576
+ (eq_attr "type" "fmuls,fmuld"))
15577
+ "cortex_a53_slot0")
15579
+;; For single-precision multiply-accumulate, the add (accumulate) is issued after
15580
+;; the multiply completes. Model that accordingly.
15582
+(define_insn_reservation "cortex_a53_fpmac" 8
15583
+ (and (eq_attr "tune" "cortexa53")
15584
+ (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
15585
+ "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
15587
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15588
+;; Floating-point divide/square root instructions.
15589
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15590
+;; fsqrt really takes one cycle less, but that is not modelled.
15592
+(define_insn_reservation "cortex_a53_fdivs" 14
15593
+ (and (eq_attr "tune" "cortexa53")
15594
+ (eq_attr "type" "fdivs"))
15595
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
15597
+(define_insn_reservation "cortex_a53_fdivd" 29
15598
+ (and (eq_attr "tune" "cortexa53")
15599
+ (eq_attr "type" "fdivd"))
15600
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
15602
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15603
+;; VFP to/from core transfers.
15604
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15606
+(define_insn_reservation "cortex_a53_r2f" 4
15607
+ (and (eq_attr "tune" "cortexa53")
15608
+ (eq_attr "type" "r_2_f"))
15609
+ "cortex_a53_slot0")
15611
+(define_insn_reservation "cortex_a53_f2r" 2
15612
+ (and (eq_attr "tune" "cortexa53")
15613
+ (eq_attr "type" "f_2_r"))
15614
+ "cortex_a53_slot0")
15616
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15617
+;; VFP flag transfer.
15618
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15620
+(define_insn_reservation "cortex_a53_f_flags" 4
15621
+ (and (eq_attr "tune" "cortexa53")
15622
+ (eq_attr "type" "f_flag"))
15623
+ "cortex_a53_slot0")
15625
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15626
+;; VFP load/store.
15627
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15629
+(define_insn_reservation "cortex_a53_f_loads" 4
15630
+ (and (eq_attr "tune" "cortexa53")
15631
+ (eq_attr "type" "f_loads"))
15632
+ "cortex_a53_slot0")
15634
+(define_insn_reservation "cortex_a53_f_loadd" 5
15635
+ (and (eq_attr "tune" "cortexa53")
15636
+ (eq_attr "type" "f_loadd"))
15637
+ "cortex_a53_slot0")
15639
+(define_insn_reservation "cortex_a53_f_stores" 0
15640
+ (and (eq_attr "tune" "cortexa53")
15641
+ (eq_attr "type" "f_stores"))
15642
+ "cortex_a53_slot0")
15644
+(define_insn_reservation "cortex_a53_f_stored" 0
15645
+ (and (eq_attr "tune" "cortexa53")
15646
+ (eq_attr "type" "f_stored"))
15647
+ "cortex_a53_slot0")
15649
+;; Load-to-use for floating-point values has a penalty of one cycle,
15650
+;; i.e. a latency of two.
15652
+(define_bypass 2 "cortex_a53_f_loads"
15653
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
15654
+ cortex_a53_fdivs, cortex_a53_fdivd,\
15657
+(define_bypass 2 "cortex_a53_f_loadd"
15658
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
15659
+ cortex_a53_fdivs, cortex_a53_fdivd,\
15662
--- a/src/gcc/config/arm/bpabi.h
15663
+++ b/src/gcc/config/arm/bpabi.h
15666
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
15667
|mcpu=marvell-pj4 \
15668
+ |mcpu=cortex-a53 \
15669
|mcpu=generic-armv7-a \
15670
|march=armv7-m|mcpu=cortex-m3 \
15671
|march=armv7e-m|mcpu=cortex-m4 \
15673
" %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \
15675
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
15676
+ |mcpu=cortex-a53 \
15677
|mcpu=marvell-pj4 \
15678
|mcpu=generic-armv7-a \
15679
|march=armv7-m|mcpu=cortex-m3 \
15680
--- a/src/gcc/config/arm/sync.md
15681
+++ b/src/gcc/config/arm/sync.md
15683
(set_attr "conds" "unconditional")
15684
(set_attr "predicable" "no")])
15686
+(define_insn "atomic_load<mode>"
15687
+ [(set (match_operand:QHSI 0 "register_operand" "=r")
15688
+ (unspec_volatile:QHSI
15689
+ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")
15690
+ (match_operand:SI 2 "const_int_operand")] ;; model
15692
+ "TARGET_HAVE_LDACQ"
15694
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
15695
+ if (model == MEMMODEL_RELAXED
15696
+ || model == MEMMODEL_CONSUME
15697
+ || model == MEMMODEL_RELEASE)
15698
+ return \"ldr<sync_sfx>\\t%0, %1\";
15700
+ return \"lda<sync_sfx>\\t%0, %1\";
15704
+(define_insn "atomic_store<mode>"
15705
+ [(set (match_operand:QHSI 0 "memory_operand" "=Q")
15706
+ (unspec_volatile:QHSI
15707
+ [(match_operand:QHSI 1 "general_operand" "r")
15708
+ (match_operand:SI 2 "const_int_operand")] ;; model
15710
+ "TARGET_HAVE_LDACQ"
15712
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
15713
+ if (model == MEMMODEL_RELAXED
15714
+ || model == MEMMODEL_CONSUME
15715
+ || model == MEMMODEL_ACQUIRE)
15716
+ return \"str<sync_sfx>\t%1, %0\";
15718
+ return \"stl<sync_sfx>\t%1, %0\";
15722
;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic,
15723
;; even for a 64-bit aligned address. Instead we use a ldrexd unparied
15725
@@ -327,6 +363,16 @@
15726
"ldrex<sync_sfx>%?\t%0, %C1"
15727
[(set_attr "predicable" "yes")])
15729
+(define_insn "arm_load_acquire_exclusive<mode>"
15730
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
15732
+ (unspec_volatile:NARROW
15733
+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
15735
+ "TARGET_HAVE_LDACQ"
15736
+ "ldaex<sync_sfx>%?\\t%0, %C1"
15737
+ [(set_attr "predicable" "yes")])
15739
(define_insn "arm_load_exclusivesi"
15740
[(set (match_operand:SI 0 "s_register_operand" "=r")
15741
(unspec_volatile:SI
15742
@@ -336,6 +382,15 @@
15744
[(set_attr "predicable" "yes")])
15746
+(define_insn "arm_load_acquire_exclusivesi"
15747
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
15748
+ (unspec_volatile:SI
15749
+ [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
15751
+ "TARGET_HAVE_LDACQ"
15752
+ "ldaex%?\t%0, %C1"
15753
+ [(set_attr "predicable" "yes")])
15755
(define_insn "arm_load_exclusivedi"
15756
[(set (match_operand:DI 0 "s_register_operand" "=r")
15757
(unspec_volatile:DI
15758
@@ -345,6 +400,15 @@
15759
"ldrexd%?\t%0, %H0, %C1"
15760
[(set_attr "predicable" "yes")])
15762
+(define_insn "arm_load_acquire_exclusivedi"
15763
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
15764
+ (unspec_volatile:DI
15765
+ [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
15767
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
15768
+ "ldaexd%?\t%0, %H0, %C1"
15769
+ [(set_attr "predicable" "yes")])
15771
(define_insn "arm_store_exclusive<mode>"
15772
[(set (match_operand:SI 0 "s_register_operand" "=&r")
15773
(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
15774
@@ -368,3 +432,31 @@
15775
return "strex<sync_sfx>%?\t%0, %2, %C1";
15777
[(set_attr "predicable" "yes")])
15779
+(define_insn "arm_store_release_exclusivedi"
15780
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
15781
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
15782
+ (set (match_operand:DI 1 "mem_noofs_operand" "=Ua")
15783
+ (unspec_volatile:DI
15784
+ [(match_operand:DI 2 "s_register_operand" "r")]
15786
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
15788
+ rtx value = operands[2];
15789
+ /* See comment in arm_store_exclusive<mode> above. */
15790
+ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
15791
+ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
15792
+ return "stlexd%?\t%0, %2, %3, %C1";
15794
+ [(set_attr "predicable" "yes")])
15796
+(define_insn "arm_store_release_exclusive<mode>"
15797
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
15798
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
15799
+ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
15800
+ (unspec_volatile:QHSI
15801
+ [(match_operand:QHSI 2 "s_register_operand" "r")]
15803
+ "TARGET_HAVE_LDACQ"
15804
+ "stlex<sync_sfx>%?\t%0, %2, %C1"
15805
+ [(set_attr "predicable" "yes")])
15806
--- a/src/gcc/config/arm/neon-testgen.ml
15807
+++ b/src/gcc/config/arm/neon-testgen.ml
15808
@@ -163,10 +163,12 @@
15809
match List.find (fun feature ->
15810
match feature with Requires_feature _ -> true
15811
| Requires_arch _ -> true
15812
+ | Requires_FP_bit 1 -> true
15815
Requires_feature "FMA" -> "arm_neonv2"
15816
| Requires_arch 8 -> "arm_v8_neon"
15817
+ | Requires_FP_bit 1 -> "arm_neon_fp16"
15818
| _ -> assert false
15819
with Not_found -> "arm_neon"
15821
--- a/src/gcc/config/arm/arm.md
15822
+++ b/src/gcc/config/arm/arm.md
15824
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
15825
; arm_arch6. This attribute is used to compute attribute "enabled",
15826
; use type "any" to enable an alternative in all cases.
15827
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
15828
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
15829
(const_string "any"))
15831
(define_attr "arch_enabled" "no,yes"
15832
@@ -129,24 +129,16 @@
15833
(match_test "TARGET_32BIT && !arm_arch6"))
15834
(const_string "yes")
15836
- (and (eq_attr "arch" "onlya8")
15837
- (eq_attr "tune" "cortexa8"))
15838
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
15839
+ (match_test "TARGET_NEON")
15840
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
15841
(const_string "yes")
15843
- (and (eq_attr "arch" "neon_onlya8")
15844
- (eq_attr "tune" "cortexa8")
15845
- (match_test "TARGET_NEON"))
15846
+ (and (eq_attr "arch" "neon_for_64bits")
15847
+ (match_test "TARGET_NEON")
15848
+ (match_test "TARGET_PREFER_NEON_64BITS"))
15849
(const_string "yes")
15851
- (and (eq_attr "arch" "nota8")
15852
- (not (eq_attr "tune" "cortexa8")))
15853
- (const_string "yes")
15855
- (and (eq_attr "arch" "neon_nota8")
15856
- (not (eq_attr "tune" "cortexa8"))
15857
- (match_test "TARGET_NEON"))
15858
- (const_string "yes")
15860
(and (eq_attr "arch" "iwmmxt2")
15861
(match_test "TARGET_REALLY_IWMMXT2"))
15862
(const_string "yes")]
15863
@@ -296,6 +288,8 @@
15872
@@ -502,7 +496,7 @@
15874
(define_attr "generic_sched" "yes,no"
15875
(const (if_then_else
15876
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4,marvell_pj4")
15877
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexm4,marvell_pj4")
15878
(eq_attr "tune_cortexr4" "yes"))
15879
(const_string "no")
15880
(const_string "yes"))))
15881
@@ -510,7 +504,7 @@
15882
(define_attr "generic_vfp" "yes,no"
15883
(const (if_then_else
15884
(and (eq_attr "fpu" "vfp")
15885
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4,marvell_pj4")
15886
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
15887
(eq_attr "tune_cortexr4" "no"))
15888
(const_string "yes")
15889
(const_string "no"))))
15890
@@ -531,6 +525,7 @@
15891
(include "cortex-a8.md")
15892
(include "cortex-a9.md")
15893
(include "cortex-a15.md")
15894
+(include "cortex-a53.md")
15895
(include "cortex-r4.md")
15896
(include "cortex-r4f.md")
15897
(include "cortex-m4.md")
15898
@@ -844,7 +839,7 @@
15900
;; This is the canonicalization of addsi3_compare0_for_combiner when the
15901
;; addend is a constant.
15902
-(define_insn "*cmpsi2_addneg"
15903
+(define_insn "cmpsi2_addneg"
15904
[(set (reg:CC CC_REGNUM)
15906
(match_operand:SI 1 "s_register_operand" "r,r")
15907
@@ -975,7 +970,8 @@
15910
sbc%?\\t%0, %1, #%B2"
15911
- [(set_attr "conds" "use")]
15912
+ [(set_attr "conds" "use")
15913
+ (set_attr "predicable" "yes")]
15916
(define_insn "*addsi3_carryin_alt2_<optab>"
15917
@@ -987,7 +983,8 @@
15920
sbc%?\\t%0, %1, #%B2"
15921
- [(set_attr "conds" "use")]
15922
+ [(set_attr "conds" "use")
15923
+ (set_attr "predicable" "yes")]
15926
(define_insn "*addsi3_carryin_shift_<optab>"
15927
@@ -1001,6 +998,7 @@
15929
"adc%?\\t%0, %1, %3%S2"
15930
[(set_attr "conds" "use")
15931
+ (set_attr "predicable" "yes")
15932
(set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
15933
(const_string "alu_shift")
15934
(const_string "alu_shift_reg")))]
15935
@@ -1017,26 +1015,88 @@
15936
[(set_attr "conds" "set")]
15939
-(define_expand "incscc"
15940
+(define_insn "*subsi3_carryin"
15941
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
15942
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
15943
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
15944
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
15945
+ (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
15946
+ (match_operand:SI 2 "s_register_operand" "r,r"))
15947
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
15951
+ sbc%?\\t%0, %1, %2
15952
+ rsc%?\\t%0, %2, %1"
15953
+ [(set_attr "conds" "use")
15954
+ (set_attr "arch" "*,a")
15955
+ (set_attr "predicable" "yes")]
15958
-(define_insn "*arm_incscc"
15959
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
15960
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
15961
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
15962
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
15963
+(define_insn "*subsi3_carryin_const"
15964
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
15965
+ (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r")
15966
+ (match_operand:SI 2 "arm_not_operand" "K"))
15967
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
15969
+ "sbc\\t%0, %1, #%B2"
15970
+ [(set_attr "conds" "use")]
15973
+(define_insn "*subsi3_carryin_compare"
15974
+ [(set (reg:CC CC_REGNUM)
15975
+ (compare:CC (match_operand:SI 1 "s_register_operand" "r")
15976
+ (match_operand:SI 2 "s_register_operand" "r")))
15977
+ (set (match_operand:SI 0 "s_register_operand" "=r")
15978
+ (minus:SI (minus:SI (match_dup 1)
15980
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
15982
+ "sbcs\\t%0, %1, %2"
15983
+ [(set_attr "conds" "set")]
15986
+(define_insn "*subsi3_carryin_compare_const"
15987
+ [(set (reg:CC CC_REGNUM)
15988
+ (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r")
15989
+ (match_operand:SI 2 "arm_not_operand" "K")))
15990
+ (set (match_operand:SI 0 "s_register_operand" "=r")
15991
+ (minus:SI (plus:SI (match_dup 1)
15993
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
15995
+ "sbcs\\t%0, %1, #%B2"
15996
+ [(set_attr "conds" "set")]
15999
+(define_insn "*subsi3_carryin_shift"
16000
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
16001
+ (minus:SI (minus:SI
16002
+ (match_operand:SI 1 "s_register_operand" "r")
16003
+ (match_operator:SI 2 "shift_operator"
16004
+ [(match_operand:SI 3 "s_register_operand" "r")
16005
+ (match_operand:SI 4 "reg_or_int_operand" "rM")]))
16006
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16008
+ "sbc%?\\t%0, %1, %3%S2"
16009
+ [(set_attr "conds" "use")
16010
+ (set_attr "predicable" "yes")
16011
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
16012
+ (const_string "alu_shift")
16013
+ (const_string "alu_shift_reg")))]
16016
+(define_insn "*rsbsi3_carryin_shift"
16017
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
16018
+ (minus:SI (minus:SI
16019
+ (match_operator:SI 2 "shift_operator"
16020
+ [(match_operand:SI 3 "s_register_operand" "r")
16021
+ (match_operand:SI 4 "reg_or_int_operand" "rM")])
16022
+ (match_operand:SI 1 "s_register_operand" "r"))
16023
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16026
- add%d2\\t%0, %1, #1
16027
- mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
16028
+ "rsc%?\\t%0, %1, %3%S2"
16029
[(set_attr "conds" "use")
16030
- (set_attr "length" "4,8")]
16031
+ (set_attr "predicable" "yes")
16032
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
16033
+ (const_string "alu_shift")
16034
+ (const_string "alu_shift_reg")))]
16037
; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant.
16038
@@ -1087,13 +1147,27 @@
16042
-(define_insn "*arm_subdi3"
16043
+(define_insn_and_split "*arm_subdi3"
16044
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r")
16045
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
16046
(match_operand:DI 2 "s_register_operand" "r,0,0")))
16047
(clobber (reg:CC CC_REGNUM))]
16048
"TARGET_32BIT && !TARGET_NEON"
16049
- "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
16050
+ "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
16051
+ "&& reload_completed"
16052
+ [(parallel [(set (reg:CC CC_REGNUM)
16053
+ (compare:CC (match_dup 1) (match_dup 2)))
16054
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
16055
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
16056
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16058
+ operands[3] = gen_highpart (SImode, operands[0]);
16059
+ operands[0] = gen_lowpart (SImode, operands[0]);
16060
+ operands[4] = gen_highpart (SImode, operands[1]);
16061
+ operands[1] = gen_lowpart (SImode, operands[1]);
16062
+ operands[5] = gen_highpart (SImode, operands[2]);
16063
+ operands[2] = gen_lowpart (SImode, operands[2]);
16065
[(set_attr "conds" "clob")
16066
(set_attr "length" "8")]
16068
@@ -1108,55 +1182,113 @@
16069
[(set_attr "length" "4")]
16072
-(define_insn "*subdi_di_zesidi"
16073
+(define_insn_and_split "*subdi_di_zesidi"
16074
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
16075
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
16077
(match_operand:SI 2 "s_register_operand" "r,r"))))
16078
(clobber (reg:CC CC_REGNUM))]
16080
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
16081
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
16082
+ "&& reload_completed"
16083
+ [(parallel [(set (reg:CC CC_REGNUM)
16084
+ (compare:CC (match_dup 1) (match_dup 2)))
16085
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
16086
+ (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5))
16087
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16089
+ operands[3] = gen_highpart (SImode, operands[0]);
16090
+ operands[0] = gen_lowpart (SImode, operands[0]);
16091
+ operands[4] = gen_highpart (SImode, operands[1]);
16092
+ operands[1] = gen_lowpart (SImode, operands[1]);
16093
+ operands[5] = GEN_INT (~0);
16095
[(set_attr "conds" "clob")
16096
(set_attr "length" "8")]
16099
-(define_insn "*subdi_di_sesidi"
16100
+(define_insn_and_split "*subdi_di_sesidi"
16101
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
16102
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
16104
(match_operand:SI 2 "s_register_operand" "r,r"))))
16105
(clobber (reg:CC CC_REGNUM))]
16107
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
16108
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
16109
+ "&& reload_completed"
16110
+ [(parallel [(set (reg:CC CC_REGNUM)
16111
+ (compare:CC (match_dup 1) (match_dup 2)))
16112
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
16113
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4)
16114
+ (ashiftrt:SI (match_dup 2)
16116
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16118
+ operands[3] = gen_highpart (SImode, operands[0]);
16119
+ operands[0] = gen_lowpart (SImode, operands[0]);
16120
+ operands[4] = gen_highpart (SImode, operands[1]);
16121
+ operands[1] = gen_lowpart (SImode, operands[1]);
16123
[(set_attr "conds" "clob")
16124
(set_attr "length" "8")]
16127
-(define_insn "*subdi_zesidi_di"
16128
+(define_insn_and_split "*subdi_zesidi_di"
16129
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
16130
(minus:DI (zero_extend:DI
16131
(match_operand:SI 2 "s_register_operand" "r,r"))
16132
(match_operand:DI 1 "s_register_operand" "0,r")))
16133
(clobber (reg:CC CC_REGNUM))]
16135
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
16136
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
16137
+ ; is equivalent to:
16138
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0"
16139
+ "&& reload_completed"
16140
+ [(parallel [(set (reg:CC CC_REGNUM)
16141
+ (compare:CC (match_dup 2) (match_dup 1)))
16142
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
16143
+ (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4))
16144
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16146
+ operands[3] = gen_highpart (SImode, operands[0]);
16147
+ operands[0] = gen_lowpart (SImode, operands[0]);
16148
+ operands[4] = gen_highpart (SImode, operands[1]);
16149
+ operands[1] = gen_lowpart (SImode, operands[1]);
16151
[(set_attr "conds" "clob")
16152
(set_attr "length" "8")]
16155
-(define_insn "*subdi_sesidi_di"
16156
+(define_insn_and_split "*subdi_sesidi_di"
16157
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
16158
(minus:DI (sign_extend:DI
16159
(match_operand:SI 2 "s_register_operand" "r,r"))
16160
(match_operand:DI 1 "s_register_operand" "0,r")))
16161
(clobber (reg:CC CC_REGNUM))]
16163
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
16164
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
16165
+ ; is equivalent to:
16166
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31"
16167
+ "&& reload_completed"
16168
+ [(parallel [(set (reg:CC CC_REGNUM)
16169
+ (compare:CC (match_dup 2) (match_dup 1)))
16170
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
16171
+ (set (match_dup 3) (minus:SI (minus:SI
16172
+ (ashiftrt:SI (match_dup 2)
16175
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16177
+ operands[3] = gen_highpart (SImode, operands[0]);
16178
+ operands[0] = gen_lowpart (SImode, operands[0]);
16179
+ operands[4] = gen_highpart (SImode, operands[1]);
16180
+ operands[1] = gen_lowpart (SImode, operands[1]);
16182
[(set_attr "conds" "clob")
16183
(set_attr "length" "8")]
16186
-(define_insn "*subdi_zesidi_zesidi"
16187
+(define_insn_and_split "*subdi_zesidi_zesidi"
16188
[(set (match_operand:DI 0 "s_register_operand" "=r")
16189
(minus:DI (zero_extend:DI
16190
(match_operand:SI 1 "s_register_operand" "r"))
16191
@@ -1164,7 +1296,17 @@
16192
(match_operand:SI 2 "s_register_operand" "r"))))
16193
(clobber (reg:CC CC_REGNUM))]
16195
- "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
16196
+ "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
16197
+ "&& reload_completed"
16198
+ [(parallel [(set (reg:CC CC_REGNUM)
16199
+ (compare:CC (match_dup 1) (match_dup 2)))
16200
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
16201
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1))
16202
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16204
+ operands[3] = gen_highpart (SImode, operands[0]);
16205
+ operands[0] = gen_lowpart (SImode, operands[0]);
16207
[(set_attr "conds" "clob")
16208
(set_attr "length" "8")]
16210
@@ -1254,7 +1396,7 @@
16211
(set_attr "type" "simple_alu_imm,*,*")]
16214
-(define_insn "*subsi3_compare"
16215
+(define_insn "subsi3_compare"
16216
[(set (reg:CC CC_REGNUM)
16217
(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
16218
(match_operand:SI 2 "arm_rhs_operand" "I,r,r")))
16219
@@ -1269,29 +1411,6 @@
16220
(set_attr "type" "simple_alu_imm,*,*")]
16223
-(define_expand "decscc"
16224
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
16225
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
16226
- (match_operator:SI 2 "arm_comparison_operator"
16227
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
16232
-(define_insn "*arm_decscc"
16233
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
16234
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
16235
- (match_operator:SI 2 "arm_comparison_operator"
16236
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
16239
- sub%d2\\t%0, %1, #1
16240
- mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
16241
- [(set_attr "conds" "use")
16242
- (set_attr "length" "*,8")
16243
- (set_attr "type" "simple_alu_imm,*")]
16246
(define_expand "subsf3"
16247
[(set (match_operand:SF 0 "s_register_operand" "")
16248
(minus:SF (match_operand:SF 1 "s_register_operand" "")
16249
@@ -2024,13 +2143,58 @@
16253
-(define_insn "*anddi3_insn"
16254
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
16255
- (and:DI (match_operand:DI 1 "s_register_operand" "%0,r")
16256
- (match_operand:DI 2 "s_register_operand" "r,r")))]
16257
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
16259
- [(set_attr "length" "8")]
16260
+(define_insn_and_split "*anddi3_insn"
16261
+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
16262
+ (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0")
16263
+ (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))]
16264
+ "TARGET_32BIT && !TARGET_IWMMXT"
16266
+ switch (which_alternative)
16271
+ case 3: /* fall through */
16273
+ case 4: /* fall through */
16274
+ case 8: return "vand\t%P0, %P1, %P2";
16275
+ case 5: /* fall through */
16276
+ case 9: return neon_output_logic_immediate ("vand", &operands[2],
16277
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
16278
+ case 6: return "#";
16279
+ case 7: return "#";
16280
+ default: gcc_unreachable ();
16283
+ "TARGET_32BIT && !TARGET_IWMMXT"
16284
+ [(set (match_dup 3) (match_dup 4))
16285
+ (set (match_dup 5) (match_dup 6))]
16288
+ operands[3] = gen_lowpart (SImode, operands[0]);
16289
+ operands[5] = gen_highpart (SImode, operands[0]);
16291
+ operands[4] = simplify_gen_binary (AND, SImode,
16292
+ gen_lowpart (SImode, operands[1]),
16293
+ gen_lowpart (SImode, operands[2]));
16294
+ operands[6] = simplify_gen_binary (AND, SImode,
16295
+ gen_highpart (SImode, operands[1]),
16296
+ gen_highpart_mode (SImode, DImode, operands[2]));
16299
+ [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
16300
+ (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
16301
+ avoid_neon_for_64bits,avoid_neon_for_64bits")
16302
+ (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
16303
+ (set (attr "insn_enabled") (if_then_else
16304
+ (lt (symbol_ref "which_alternative")
16306
+ (if_then_else (match_test "!TARGET_NEON")
16307
+ (const_string "yes")
16308
+ (const_string "no"))
16309
+ (if_then_else (match_test "TARGET_NEON")
16310
+ (const_string "yes")
16311
+ (const_string "no"))))]
16314
(define_insn_and_split "*anddi_zesidi_di"
16315
@@ -3096,13 +3260,17 @@
16319
-(define_insn "*andsi_iorsi3_notsi"
16320
+(define_insn_and_split "*andsi_iorsi3_notsi"
16321
[(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
16322
(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
16323
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
16324
(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
16326
- "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
16327
+ "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
16328
+ "&& reload_completed"
16329
+ [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
16330
+ (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))]
16332
[(set_attr "length" "8")
16333
(set_attr "ce_count" "2")
16334
(set_attr "predicable" "yes")]
16335
@@ -3253,15 +3421,23 @@
16336
[(set_attr "predicable" "yes")]
16339
-(define_insn "*arm_smax_insn"
16340
+(define_insn_and_split "*arm_smax_insn"
16341
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
16342
(smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
16343
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
16344
(clobber (reg:CC CC_REGNUM))]
16347
- cmp\\t%1, %2\;movlt\\t%0, %2
16348
- cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
16350
+ ; cmp\\t%1, %2\;movlt\\t%0, %2
16351
+ ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
16353
+ [(set (reg:CC CC_REGNUM)
16354
+ (compare:CC (match_dup 1) (match_dup 2)))
16355
+ (set (match_dup 0)
16356
+ (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
16360
[(set_attr "conds" "clob")
16361
(set_attr "length" "8,12")]
16363
@@ -3293,15 +3469,23 @@
16364
[(set_attr "predicable" "yes")]
16367
-(define_insn "*arm_smin_insn"
16368
+(define_insn_and_split "*arm_smin_insn"
16369
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
16370
(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
16371
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
16372
(clobber (reg:CC CC_REGNUM))]
16375
- cmp\\t%1, %2\;movge\\t%0, %2
16376
- cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
16378
+ ; cmp\\t%1, %2\;movge\\t%0, %2
16379
+ ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
16381
+ [(set (reg:CC CC_REGNUM)
16382
+ (compare:CC (match_dup 1) (match_dup 2)))
16383
+ (set (match_dup 0)
16384
+ (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
16388
[(set_attr "conds" "clob")
16389
(set_attr "length" "8,12")]
16391
@@ -3316,16 +3500,24 @@
16395
-(define_insn "*arm_umaxsi3"
16396
+(define_insn_and_split "*arm_umaxsi3"
16397
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
16398
(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
16399
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
16400
(clobber (reg:CC CC_REGNUM))]
16403
- cmp\\t%1, %2\;movcc\\t%0, %2
16404
- cmp\\t%1, %2\;movcs\\t%0, %1
16405
- cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
16407
+ ; cmp\\t%1, %2\;movcc\\t%0, %2
16408
+ ; cmp\\t%1, %2\;movcs\\t%0, %1
16409
+ ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
16411
+ [(set (reg:CC CC_REGNUM)
16412
+ (compare:CC (match_dup 1) (match_dup 2)))
16413
+ (set (match_dup 0)
16414
+ (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
16418
[(set_attr "conds" "clob")
16419
(set_attr "length" "8,8,12")]
16421
@@ -3340,16 +3532,24 @@
16425
-(define_insn "*arm_uminsi3"
16426
+(define_insn_and_split "*arm_uminsi3"
16427
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
16428
(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
16429
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
16430
(clobber (reg:CC CC_REGNUM))]
16433
- cmp\\t%1, %2\;movcs\\t%0, %2
16434
- cmp\\t%1, %2\;movcc\\t%0, %1
16435
- cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
16437
+ ; cmp\\t%1, %2\;movcs\\t%0, %2
16438
+ ; cmp\\t%1, %2\;movcc\\t%0, %1
16439
+ ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
16441
+ [(set (reg:CC CC_REGNUM)
16442
+ (compare:CC (match_dup 1) (match_dup 2)))
16443
+ (set (match_dup 0)
16444
+ (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
16448
[(set_attr "conds" "clob")
16449
(set_attr "length" "8,8,12")]
16451
@@ -3360,7 +3560,7 @@
16452
[(match_operand:SI 1 "s_register_operand" "r")
16453
(match_operand:SI 2 "s_register_operand" "r")]))
16454
(clobber (reg:CC CC_REGNUM))]
16456
+ "TARGET_32BIT && optimize_insn_for_size_p()"
16458
operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
16459
operands[1], operands[2]);
16460
@@ -3423,6 +3623,50 @@
16464
+; Reject the frame pointer in operand[1], since reloading this after
16465
+; it has been eliminated can cause carnage.
16466
+(define_insn_and_split "*minmax_arithsi_non_canon"
16467
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r")
16469
+ (match_operand:SI 1 "s_register_operand" "0,?r")
16470
+ (match_operator:SI 4 "minmax_operator"
16471
+ [(match_operand:SI 2 "s_register_operand" "r,r")
16472
+ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
16473
+ (clobber (reg:CC CC_REGNUM))]
16474
+ "TARGET_32BIT && !arm_eliminable_register (operands[1])"
16476
+ "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
16477
+ [(set (reg:CC CC_REGNUM)
16478
+ (compare:CC (match_dup 2) (match_dup 3)))
16480
+ (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)])
16481
+ (set (match_dup 0)
16482
+ (minus:SI (match_dup 1)
16484
+ (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
16485
+ (set (match_dup 0)
16486
+ (minus:SI (match_dup 1)
16487
+ (match_dup 3))))]
16489
+ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
16490
+ operands[2], operands[3]);
16491
+ enum rtx_code rc = minmax_code (operands[4]);
16492
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode,
16493
+ operands[2], operands[3]);
16495
+ if (mode == CCFPmode || mode == CCFPEmode)
16496
+ rc = reverse_condition_maybe_unordered (rc);
16498
+ rc = reverse_condition (rc);
16499
+ operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
16501
+ [(set_attr "conds" "clob")
16502
+ (set (attr "length")
16503
+ (if_then_else (eq_attr "is_thumb" "yes")
16505
+ (const_int 12)))]
16508
(define_code_iterator SAT [smin smax])
16509
(define_code_iterator SATrev [smin smax])
16510
(define_code_attr SATlo [(smin "1") (smax "2")])
16511
@@ -3533,13 +3777,26 @@
16515
-(define_insn "arm_ashldi3_1bit"
16516
+(define_insn_and_split "arm_ashldi3_1bit"
16517
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
16518
(ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
16520
(clobber (reg:CC CC_REGNUM))]
16522
- "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
16523
+ "#" ; "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
16524
+ "&& reload_completed"
16525
+ [(parallel [(set (reg:CC CC_REGNUM)
16526
+ (compare:CC (ashift:SI (match_dup 1) (const_int 1))
16528
+ (set (match_dup 0) (ashift:SI (match_dup 1) (const_int 1)))])
16529
+ (set (match_dup 2) (plus:SI (plus:SI (match_dup 3) (match_dup 3))
16530
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16532
+ operands[2] = gen_highpart (SImode, operands[0]);
16533
+ operands[0] = gen_lowpart (SImode, operands[0]);
16534
+ operands[3] = gen_highpart (SImode, operands[1]);
16535
+ operands[1] = gen_lowpart (SImode, operands[1]);
16537
[(set_attr "conds" "clob")
16538
(set_attr "length" "8")]
16540
@@ -3615,18 +3872,43 @@
16544
-(define_insn "arm_ashrdi3_1bit"
16545
+(define_insn_and_split "arm_ashrdi3_1bit"
16546
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
16547
(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
16549
(clobber (reg:CC CC_REGNUM))]
16551
- "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
16552
+ "#" ; "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
16553
+ "&& reload_completed"
16554
+ [(parallel [(set (reg:CC CC_REGNUM)
16555
+ (compare:CC (ashiftrt:SI (match_dup 3) (const_int 1))
16557
+ (set (match_dup 2) (ashiftrt:SI (match_dup 3) (const_int 1)))])
16558
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
16559
+ (reg:CC_C CC_REGNUM)]
16562
+ operands[2] = gen_highpart (SImode, operands[0]);
16563
+ operands[0] = gen_lowpart (SImode, operands[0]);
16564
+ operands[3] = gen_highpart (SImode, operands[1]);
16565
+ operands[1] = gen_lowpart (SImode, operands[1]);
16567
[(set_attr "conds" "clob")
16568
- (set_attr "insn" "mov")
16569
(set_attr "length" "8")]
16572
+(define_insn "*rrx"
16573
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
16574
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
16575
+ (reg:CC_C CC_REGNUM)]
16578
+ "mov\\t%0, %1, rrx"
16579
+ [(set_attr "conds" "use")
16580
+ (set_attr "insn" "mov")
16581
+ (set_attr "type" "alu_shift")]
16584
(define_expand "ashrsi3"
16585
[(set (match_operand:SI 0 "s_register_operand" "")
16586
(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
16587
@@ -3695,15 +3977,28 @@
16591
-(define_insn "arm_lshrdi3_1bit"
16592
+(define_insn_and_split "arm_lshrdi3_1bit"
16593
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
16594
(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
16596
(clobber (reg:CC CC_REGNUM))]
16598
- "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
16599
+ "#" ; "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
16600
+ "&& reload_completed"
16601
+ [(parallel [(set (reg:CC CC_REGNUM)
16602
+ (compare:CC (lshiftrt:SI (match_dup 3) (const_int 1))
16604
+ (set (match_dup 2) (lshiftrt:SI (match_dup 3) (const_int 1)))])
16605
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
16606
+ (reg:CC_C CC_REGNUM)]
16609
+ operands[2] = gen_highpart (SImode, operands[0]);
16610
+ operands[0] = gen_lowpart (SImode, operands[0]);
16611
+ operands[3] = gen_highpart (SImode, operands[1]);
16612
+ operands[1] = gen_lowpart (SImode, operands[1]);
16614
[(set_attr "conds" "clob")
16615
- (set_attr "insn" "mov")
16616
(set_attr "length" "8")]
16619
@@ -3791,6 +4086,23 @@
16620
(const_string "alu_shift_reg")))]
16623
+(define_insn "*shiftsi3_compare"
16624
+ [(set (reg:CC CC_REGNUM)
16625
+ (compare:CC (match_operator:SI 3 "shift_operator"
16626
+ [(match_operand:SI 1 "s_register_operand" "r")
16627
+ (match_operand:SI 2 "arm_rhs_operand" "rM")])
16629
+ (set (match_operand:SI 0 "s_register_operand" "=r")
16630
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
16632
+ "* return arm_output_shift(operands, 1);"
16633
+ [(set_attr "conds" "set")
16634
+ (set_attr "shift" "1")
16635
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
16636
+ (const_string "alu_shift")
16637
+ (const_string "alu_shift_reg")))]
16640
(define_insn "*shiftsi3_compare0"
16641
[(set (reg:CC_NOOV CC_REGNUM)
16642
(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
16643
@@ -4154,12 +4466,24 @@
16645
;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
16646
;; The first alternative allows the common case of a *full* overlap.
16647
-(define_insn "*arm_negdi2"
16648
+(define_insn_and_split "*arm_negdi2"
16649
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
16650
(neg:DI (match_operand:DI 1 "s_register_operand" "0,r")))
16651
(clobber (reg:CC CC_REGNUM))]
16653
- "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
16654
+ "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
16655
+ "&& reload_completed"
16656
+ [(parallel [(set (reg:CC CC_REGNUM)
16657
+ (compare:CC (const_int 0) (match_dup 1)))
16658
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
16659
+ (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
16660
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16662
+ operands[2] = gen_highpart (SImode, operands[0]);
16663
+ operands[0] = gen_lowpart (SImode, operands[0]);
16664
+ operands[3] = gen_highpart (SImode, operands[1]);
16665
+ operands[1] = gen_lowpart (SImode, operands[1]);
16667
[(set_attr "conds" "clob")
16668
(set_attr "length" "8")]
16670
@@ -4209,6 +4533,73 @@
16671
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
16674
+;; Negate an extended 32-bit value.
16675
+(define_insn_and_split "*negdi_extendsidi"
16676
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r,l,&l")
16677
+ (neg:DI (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r,0,l"))))
16678
+ (clobber (reg:CC CC_REGNUM))]
16680
+ "#" ; rsb\\t%Q0, %1, #0\;asr\\t%R0, %Q0, #31
16681
+ "&& reload_completed"
16684
+ operands[2] = gen_highpart (SImode, operands[0]);
16685
+ operands[0] = gen_lowpart (SImode, operands[0]);
16686
+ rtx tmp = gen_rtx_SET (VOIDmode,
16688
+ gen_rtx_MINUS (SImode,
16697
+ /* Set the flags, to emit the short encoding in Thumb2. */
16698
+ rtx flags = gen_rtx_SET (VOIDmode,
16699
+ gen_rtx_REG (CCmode, CC_REGNUM),
16700
+ gen_rtx_COMPARE (CCmode,
16703
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
16708
+ emit_insn (gen_rtx_SET (VOIDmode,
16710
+ gen_rtx_ASHIFTRT (SImode,
16715
+ [(set_attr "length" "8,8,4,4")
16716
+ (set_attr "arch" "a,a,t2,t2")]
16719
+(define_insn_and_split "*negdi_zero_extendsidi"
16720
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r")
16721
+ (neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))))
16722
+ (clobber (reg:CC CC_REGNUM))]
16724
+ "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0"
16725
+ ;; Don't care what register is input to sbc,
16726
+ ;; since we just just need to propagate the carry.
16727
+ "&& reload_completed"
16728
+ [(parallel [(set (reg:CC CC_REGNUM)
16729
+ (compare:CC (const_int 0) (match_dup 1)))
16730
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
16731
+ (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2))
16732
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
16734
+ operands[2] = gen_highpart (SImode, operands[0]);
16735
+ operands[0] = gen_lowpart (SImode, operands[0]);
16737
+ [(set_attr "conds" "clob")
16738
+ (set_attr "length" "8")] ;; length in thumb is 4
16741
;; abssi2 doesn't really clobber the condition codes if a different register
16742
;; is being set. To keep things simple, assume during rtl manipulations that
16743
;; it does, but tell the final scan operator the truth. Similarly for
16744
@@ -4227,14 +4618,67 @@
16745
operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
16748
-(define_insn "*arm_abssi2"
16749
+(define_insn_and_split "*arm_abssi2"
16750
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
16751
(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
16752
(clobber (reg:CC CC_REGNUM))]
16755
- cmp\\t%0, #0\;rsblt\\t%0, %0, #0
16756
- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
16758
+ "&& reload_completed"
16761
+ /* if (which_alternative == 0) */
16762
+ if (REGNO(operands[0]) == REGNO(operands[1]))
16764
+ /* Emit the pattern:
16765
+ cmp\\t%0, #0\;rsblt\\t%0, %0, #0
16766
+ [(set (reg:CC CC_REGNUM)
16767
+ (compare:CC (match_dup 0) (const_int 0)))
16768
+ (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0))
16769
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))]
16771
+ emit_insn (gen_rtx_SET (VOIDmode,
16772
+ gen_rtx_REG (CCmode, CC_REGNUM),
16773
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
16774
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
16775
+ (gen_rtx_LT (SImode,
16776
+ gen_rtx_REG (CCmode, CC_REGNUM),
16778
+ (gen_rtx_SET (VOIDmode,
16780
+ (gen_rtx_MINUS (SImode,
16782
+ operands[1]))))));
16787
+ /* Emit the pattern:
16788
+ alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
16789
+ [(set (match_dup 0)
16790
+ (xor:SI (match_dup 1)
16791
+ (ashiftrt:SI (match_dup 1) (const_int 31))))
16792
+ (set (match_dup 0)
16793
+ (minus:SI (match_dup 0)
16794
+ (ashiftrt:SI (match_dup 1) (const_int 31))))]
16796
+ emit_insn (gen_rtx_SET (VOIDmode,
16798
+ gen_rtx_XOR (SImode,
16799
+ gen_rtx_ASHIFTRT (SImode,
16803
+ emit_insn (gen_rtx_SET (VOIDmode,
16805
+ gen_rtx_MINUS (SImode,
16807
+ gen_rtx_ASHIFTRT (SImode,
16809
+ GEN_INT (31)))));
16813
[(set_attr "conds" "clob,*")
16814
(set_attr "shift" "1")
16815
(set_attr "predicable" "no, yes")
16816
@@ -4255,14 +4699,56 @@
16817
[(set_attr "length" "6")]
16820
-(define_insn "*arm_neg_abssi2"
16821
+(define_insn_and_split "*arm_neg_abssi2"
16822
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
16823
(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
16824
(clobber (reg:CC CC_REGNUM))]
16827
- cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
16828
- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
16830
+ "&& reload_completed"
16833
+ /* if (which_alternative == 0) */
16834
+ if (REGNO (operands[0]) == REGNO (operands[1]))
16836
+ /* Emit the pattern:
16837
+ cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
16839
+ emit_insn (gen_rtx_SET (VOIDmode,
16840
+ gen_rtx_REG (CCmode, CC_REGNUM),
16841
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
16842
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
16843
+ gen_rtx_GT (SImode,
16844
+ gen_rtx_REG (CCmode, CC_REGNUM),
16846
+ gen_rtx_SET (VOIDmode,
16848
+ (gen_rtx_MINUS (SImode,
16850
+ operands[1])))));
16854
+ /* Emit the pattern:
16855
+ eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
16857
+ emit_insn (gen_rtx_SET (VOIDmode,
16859
+ gen_rtx_XOR (SImode,
16860
+ gen_rtx_ASHIFTRT (SImode,
16864
+ emit_insn (gen_rtx_SET (VOIDmode,
16866
+ gen_rtx_MINUS (SImode,
16867
+ gen_rtx_ASHIFTRT (SImode,
16874
[(set_attr "conds" "clob,*")
16875
(set_attr "shift" "1")
16876
(set_attr "predicable" "no, yes")
16877
@@ -4330,7 +4816,7 @@
16878
[(set_attr "length" "*,8,8,*")
16879
(set_attr "predicable" "no,yes,yes,no")
16880
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
16881
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
16882
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
16885
(define_expand "one_cmplsi2"
16886
@@ -4498,7 +4984,7 @@
16887
"TARGET_32BIT <qhs_zextenddi_cond>"
16889
[(set_attr "length" "8,4,8,8")
16890
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")
16891
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
16892
(set_attr "ce_count" "2")
16893
(set_attr "predicable" "yes")]
16895
@@ -4513,7 +4999,7 @@
16896
(set_attr "ce_count" "2")
16897
(set_attr "shift" "1")
16898
(set_attr "predicable" "yes")
16899
- (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
16900
+ (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
16903
;; Splits for all extensions to DImode
16904
@@ -5313,8 +5799,8 @@
16907
(define_insn "*arm_movdi"
16908
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
16909
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))]
16910
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
16911
+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))]
16913
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
16915
@@ -6738,8 +7224,8 @@
16918
(define_insn "*movdf_soft_insn"
16919
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
16920
- (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
16921
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
16922
+ (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
16923
"TARGET_32BIT && TARGET_SOFT_FLOAT
16924
&& ( register_operand (operands[0], DFmode)
16925
|| register_operand (operands[1], DFmode))"
16926
@@ -7617,23 +8103,64 @@
16927
;; if-conversion can not reduce to a conditional compare, so we do
16930
-(define_insn "*arm_cmpdi_insn"
16931
+(define_insn_and_split "*arm_cmpdi_insn"
16932
[(set (reg:CC_NCV CC_REGNUM)
16933
(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
16934
(match_operand:DI 1 "arm_di_operand" "rDi")))
16935
(clobber (match_scratch:SI 2 "=r"))]
16937
- "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
16938
+ "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
16939
+ "&& reload_completed"
16940
+ [(set (reg:CC CC_REGNUM)
16941
+ (compare:CC (match_dup 0) (match_dup 1)))
16942
+ (parallel [(set (reg:CC CC_REGNUM)
16943
+ (compare:CC (match_dup 3) (match_dup 4)))
16944
+ (set (match_dup 2)
16945
+ (minus:SI (match_dup 5)
16946
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
16948
+ operands[3] = gen_highpart (SImode, operands[0]);
16949
+ operands[0] = gen_lowpart (SImode, operands[0]);
16950
+ if (CONST_INT_P (operands[1]))
16952
+ operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode,
16955
+ operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]);
16959
+ operands[4] = gen_highpart (SImode, operands[1]);
16960
+ operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]);
16962
+ operands[1] = gen_lowpart (SImode, operands[1]);
16963
+ operands[2] = gen_lowpart (SImode, operands[2]);
16965
[(set_attr "conds" "set")
16966
(set_attr "length" "8")]
16969
-(define_insn "*arm_cmpdi_unsigned"
16970
+(define_insn_and_split "*arm_cmpdi_unsigned"
16971
[(set (reg:CC_CZ CC_REGNUM)
16972
(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
16973
(match_operand:DI 1 "arm_di_operand" "rDi")))]
16975
- "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
16976
+ "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
16977
+ "&& reload_completed"
16978
+ [(set (reg:CC CC_REGNUM)
16979
+ (compare:CC (match_dup 2) (match_dup 3)))
16980
+ (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0))
16981
+ (set (reg:CC CC_REGNUM)
16982
+ (compare:CC (match_dup 0) (match_dup 1))))]
16984
+ operands[2] = gen_highpart (SImode, operands[0]);
16985
+ operands[0] = gen_lowpart (SImode, operands[0]);
16986
+ if (CONST_INT_P (operands[1]))
16987
+ operands[3] = gen_highpart_mode (SImode, DImode, operands[1]);
16989
+ operands[3] = gen_highpart (SImode, operands[1]);
16990
+ operands[1] = gen_lowpart (SImode, operands[1]);
16992
[(set_attr "conds" "set")
16993
(set_attr "length" "8")]
16995
@@ -7758,36 +8285,56 @@
16996
operands[3] = const0_rtx;"
16999
-(define_insn "*mov_scc"
17000
+(define_insn_and_split "*mov_scc"
17001
[(set (match_operand:SI 0 "s_register_operand" "=r")
17002
(match_operator:SI 1 "arm_comparison_operator"
17003
[(match_operand 2 "cc_register" "") (const_int 0)]))]
17005
- "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
17006
+ "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
17008
+ [(set (match_dup 0)
17009
+ (if_then_else:SI (match_dup 1)
17013
[(set_attr "conds" "use")
17014
- (set_attr "insn" "mov")
17015
(set_attr "length" "8")]
17018
-(define_insn "*mov_negscc"
17019
+(define_insn_and_split "*mov_negscc"
17020
[(set (match_operand:SI 0 "s_register_operand" "=r")
17021
(neg:SI (match_operator:SI 1 "arm_comparison_operator"
17022
[(match_operand 2 "cc_register" "") (const_int 0)])))]
17024
- "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
17025
+ "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
17027
+ [(set (match_dup 0)
17028
+ (if_then_else:SI (match_dup 1)
17032
+ operands[3] = GEN_INT (~0);
17034
[(set_attr "conds" "use")
17035
- (set_attr "insn" "mov")
17036
(set_attr "length" "8")]
17039
-(define_insn "*mov_notscc"
17040
+(define_insn_and_split "*mov_notscc"
17041
[(set (match_operand:SI 0 "s_register_operand" "=r")
17042
(not:SI (match_operator:SI 1 "arm_comparison_operator"
17043
[(match_operand 2 "cc_register" "") (const_int 0)])))]
17045
- "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
17046
+ "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
17048
+ [(set (match_dup 0)
17049
+ (if_then_else:SI (match_dup 1)
17053
+ operands[3] = GEN_INT (~1);
17054
+ operands[4] = GEN_INT (~0);
17056
[(set_attr "conds" "use")
17057
- (set_attr "insn" "mov")
17058
(set_attr "length" "8")]
17061
@@ -8110,7 +8657,40 @@
17065
-(define_insn "*movsicc_insn"
17066
+(define_insn "*cmov<mode>"
17067
+ [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
17068
+ (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator"
17069
+ [(match_operand 2 "cc_register" "") (const_int 0)])
17070
+ (match_operand:SDF 3 "s_register_operand"
17071
+ "<F_constraint>")
17072
+ (match_operand:SDF 4 "s_register_operand"
17073
+ "<F_constraint>")))]
17074
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
17077
+ enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
17084
+ return \"vsel%d1.<V_if_elem>\\t%<V_reg>0, %<V_reg>3, %<V_reg>4\";
17089
+ return \"vsel%D1.<V_if_elem>\\t%<V_reg>0, %<V_reg>4, %<V_reg>3\";
17091
+ gcc_unreachable ();
17095
+ [(set_attr "conds" "use")
17096
+ (set_attr "type" "f_sel<vfp_type>")]
17099
+(define_insn_and_split "*movsicc_insn"
17100
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
17102
(match_operator 3 "arm_comparison_operator"
17103
@@ -8123,10 +8703,45 @@
17107
- mov%d3\\t%0, %1\;mov%D3\\t%0, %2
17108
- mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
17109
- mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
17110
- mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
17115
+ ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2
17116
+ ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
17117
+ ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
17118
+ ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
17119
+ "&& reload_completed"
17122
+ enum rtx_code rev_code;
17123
+ enum machine_mode mode;
17126
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
17128
+ gen_rtx_SET (VOIDmode,
17132
+ rev_code = GET_CODE (operands[3]);
17133
+ mode = GET_MODE (operands[4]);
17134
+ if (mode == CCFPmode || mode == CCFPEmode)
17135
+ rev_code = reverse_condition_maybe_unordered (rev_code);
17137
+ rev_code = reverse_condition (rev_code);
17139
+ rev_cond = gen_rtx_fmt_ee (rev_code,
17143
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
17145
+ gen_rtx_SET (VOIDmode,
17150
[(set_attr "length" "4,4,4,4,8,8,8,8")
17151
(set_attr "conds" "use")
17152
(set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")
17153
@@ -9095,27 +9710,64 @@
17154
(set_attr "type" "alu_shift,alu_shift_reg")])
17157
-(define_insn "*and_scc"
17158
+(define_insn_and_split "*and_scc"
17159
[(set (match_operand:SI 0 "s_register_operand" "=r")
17160
(and:SI (match_operator:SI 1 "arm_comparison_operator"
17161
- [(match_operand 3 "cc_register" "") (const_int 0)])
17162
- (match_operand:SI 2 "s_register_operand" "r")))]
17163
+ [(match_operand 2 "cc_register" "") (const_int 0)])
17164
+ (match_operand:SI 3 "s_register_operand" "r")))]
17166
- "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
17167
+ "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
17168
+ "&& reload_completed"
17169
+ [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
17170
+ (cond_exec (match_dup 4) (set (match_dup 0)
17171
+ (and:SI (match_dup 3) (const_int 1))))]
17173
+ enum machine_mode mode = GET_MODE (operands[2]);
17174
+ enum rtx_code rc = GET_CODE (operands[1]);
17176
+ /* Note that operands[4] is the same as operands[1],
17177
+ but with VOIDmode as the result. */
17178
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
17179
+ if (mode == CCFPmode || mode == CCFPEmode)
17180
+ rc = reverse_condition_maybe_unordered (rc);
17182
+ rc = reverse_condition (rc);
17183
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
17185
[(set_attr "conds" "use")
17186
(set_attr "insn" "mov")
17187
(set_attr "length" "8")]
17190
-(define_insn "*ior_scc"
17191
+(define_insn_and_split "*ior_scc"
17192
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
17193
- (ior:SI (match_operator:SI 2 "arm_comparison_operator"
17194
- [(match_operand 3 "cc_register" "") (const_int 0)])
17195
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
17196
+ (ior:SI (match_operator:SI 1 "arm_comparison_operator"
17197
+ [(match_operand 2 "cc_register" "") (const_int 0)])
17198
+ (match_operand:SI 3 "s_register_operand" "0,?r")))]
17201
- orr%d2\\t%0, %1, #1
17202
- mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
17203
+ orr%d1\\t%0, %3, #1
17205
+ "&& reload_completed
17206
+ && REGNO (operands [0]) != REGNO (operands[3])"
17207
+ ;; && which_alternative == 1
17208
+ ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
17209
+ [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
17210
+ (cond_exec (match_dup 4) (set (match_dup 0)
17211
+ (ior:SI (match_dup 3) (const_int 1))))]
17213
+ enum machine_mode mode = GET_MODE (operands[2]);
17214
+ enum rtx_code rc = GET_CODE (operands[1]);
17216
+ /* Note that operands[4] is the same as operands[1],
17217
+ but with VOIDmode as the result. */
17218
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
17219
+ if (mode == CCFPmode || mode == CCFPEmode)
17220
+ rc = reverse_condition_maybe_unordered (rc);
17222
+ rc = reverse_condition (rc);
17223
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
17225
[(set_attr "conds" "use")
17226
(set_attr "length" "4,8")]
17228
@@ -9822,24 +10474,75 @@
17230
;; ??? The conditional patterns above need checking for Thumb-2 usefulness
17232
-(define_insn "*negscc"
17233
+(define_insn_and_split "*negscc"
17234
[(set (match_operand:SI 0 "s_register_operand" "=r")
17235
(neg:SI (match_operator 3 "arm_comparison_operator"
17236
[(match_operand:SI 1 "s_register_operand" "r")
17237
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
17238
(clobber (reg:CC CC_REGNUM))]
17241
- if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
17242
- return \"mov\\t%0, %1, asr #31\";
17244
+ "&& reload_completed"
17247
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
17249
- if (GET_CODE (operands[3]) == NE)
17250
- return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\";
17251
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
17253
+ /* Emit mov\\t%0, %1, asr #31 */
17254
+ emit_insn (gen_rtx_SET (VOIDmode,
17256
+ gen_rtx_ASHIFTRT (SImode,
17261
+ else if (GET_CODE (operands[3]) == NE)
17263
+ /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */
17264
+ if (CONST_INT_P (operands[2]))
17265
+ emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
17266
+ GEN_INT (- INTVAL (operands[2]))));
17268
+ emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
17270
- output_asm_insn (\"cmp\\t%1, %2\", operands);
17271
- output_asm_insn (\"mov%D3\\t%0, #0\", operands);
17272
- return \"mvn%d3\\t%0, #0\";
17274
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
17275
+ gen_rtx_NE (SImode,
17278
+ gen_rtx_SET (SImode,
17285
+ /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
17286
+ emit_insn (gen_rtx_SET (VOIDmode,
17288
+ gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
17289
+ enum rtx_code rc = GET_CODE (operands[3]);
17291
+ rc = reverse_condition (rc);
17292
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
17293
+ gen_rtx_fmt_ee (rc,
17297
+ gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
17298
+ rc = GET_CODE (operands[3]);
17299
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
17300
+ gen_rtx_fmt_ee (rc,
17304
+ gen_rtx_SET (VOIDmode,
17311
[(set_attr "conds" "clob")
17312
(set_attr "length" "12")]
17314
@@ -11626,6 +12329,9 @@
17315
(set_attr "predicable" "yes")])
17318
+;; Load the load/store double peephole optimizations.
17319
+(include "ldrdstrd.md")
17321
;; Load the load/store multiple patterns
17322
(include "ldmstm.md")
17324
--- a/src/gcc/config/arm/neon-gen.ml
17325
+++ b/src/gcc/config/arm/neon-gen.ml
17326
@@ -121,6 +121,7 @@
17327
| T_uint16 | T_int16 -> T_intHI
17328
| T_uint32 | T_int32 -> T_intSI
17329
| T_uint64 | T_int64 -> T_intDI
17330
+ | T_float16 -> T_floatHF
17331
| T_float32 -> T_floatSF
17332
| T_poly8 -> T_intQI
17333
| T_poly16 -> T_intHI
17334
@@ -275,8 +276,8 @@
17335
let mode = mode_of_elt elttype shape in
17336
string_of_mode mode
17337
with MixedMode (dst, src) ->
17338
- let dstmode = mode_of_elt dst shape
17339
- and srcmode = mode_of_elt src shape in
17340
+ let dstmode = mode_of_elt ~argpos:0 dst shape
17341
+ and srcmode = mode_of_elt ~argpos:1 src shape in
17342
string_of_mode dstmode ^ string_of_mode srcmode
17344
let get_shuffle features =
17345
@@ -291,19 +292,24 @@
17346
match List.find (fun feature ->
17347
match feature with Requires_feature _ -> true
17348
| Requires_arch _ -> true
17349
+ | Requires_FP_bit _ -> true
17352
- Requires_feature feature ->
17353
+ Requires_feature feature ->
17354
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
17355
| Requires_arch arch ->
17356
Format.printf "#if __ARM_ARCH >= %d@\n" arch
17357
+ | Requires_FP_bit bit ->
17358
+ Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
17360
| _ -> assert false
17361
with Not_found -> assert true
17363
let print_feature_test_end features =
17365
- List.exists (function Requires_feature x -> true
17366
- | Requires_arch x -> true
17367
+ List.exists (function Requires_feature _ -> true
17368
+ | Requires_arch _ -> true
17369
+ | Requires_FP_bit _ -> true
17370
| _ -> false) features in
17371
if feature then Format.printf "#endif@\n"
17373
@@ -365,6 +371,7 @@
17374
"__builtin_neon_hi", "int", 16, 4;
17375
"__builtin_neon_si", "int", 32, 2;
17376
"__builtin_neon_di", "int", 64, 1;
17377
+ "__builtin_neon_hf", "float", 16, 4;
17378
"__builtin_neon_sf", "float", 32, 2;
17379
"__builtin_neon_poly8", "poly", 8, 8;
17380
"__builtin_neon_poly16", "poly", 16, 4;
17381
--- a/src/libobjc/ChangeLog.linaro
17382
+++ b/src/libobjc/ChangeLog.linaro
17384
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17386
+ * GCC Linaro 4.8-2013.04 released.
17387
--- a/src/libgfortran/ChangeLog.linaro
17388
+++ b/src/libgfortran/ChangeLog.linaro
17390
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17392
+ * GCC Linaro 4.8-2013.04 released.
17393
--- a/src/libada/ChangeLog.linaro
17394
+++ b/src/libada/ChangeLog.linaro
17396
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17398
+ * GCC Linaro 4.8-2013.04 released.
17399
--- a/src/libffi/ChangeLog.linaro
17400
+++ b/src/libffi/ChangeLog.linaro
17402
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17404
+ * GCC Linaro 4.8-2013.04 released.
17405
--- a/src/libssp/ChangeLog.linaro
17406
+++ b/src/libssp/ChangeLog.linaro
17408
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17410
+ * GCC Linaro 4.8-2013.04 released.
17411
--- a/src/libcpp/ChangeLog.linaro
17412
+++ b/src/libcpp/ChangeLog.linaro
17414
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17416
+ * GCC Linaro 4.8-2013.04 released.
17417
--- a/src/libcpp/po/ChangeLog.linaro
17418
+++ b/src/libcpp/po/ChangeLog.linaro
17420
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17422
+ * GCC Linaro 4.8-2013.04 released.
17423
--- a/src/fixincludes/ChangeLog.linaro
17424
+++ b/src/fixincludes/ChangeLog.linaro
17426
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
17428
+ * GCC Linaro 4.8-2013.04 released.