1
# DP: Changes for the Linaro 4.8-2013.04 release.
3
LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@197294 \
4
svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@r198546 \
5
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/libitm/ChangeLog.linaro
8
+++ b/src/libitm/ChangeLog.linaro
10
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
12
+ * GCC Linaro 4.8-2013.04 released.
13
--- a/src/libgomp/ChangeLog.linaro
14
+++ b/src/libgomp/ChangeLog.linaro
16
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
18
+ * GCC Linaro 4.8-2013.04 released.
19
--- a/src/libquadmath/ChangeLog.linaro
20
+++ b/src/libquadmath/ChangeLog.linaro
22
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
24
+ * GCC Linaro 4.8-2013.04 released.
25
--- a/src/libsanitizer/ChangeLog.linaro
26
+++ b/src/libsanitizer/ChangeLog.linaro
28
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
30
+ * GCC Linaro 4.8-2013.04 released.
31
--- a/src/zlib/ChangeLog.linaro
32
+++ b/src/zlib/ChangeLog.linaro
34
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
36
+ * GCC Linaro 4.8-2013.04 released.
37
--- a/src/libstdc++-v3/ChangeLog.linaro
38
+++ b/src/libstdc++-v3/ChangeLog.linaro
40
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
42
+ * GCC Linaro 4.8-2013.04 released.
43
--- a/src/intl/ChangeLog.linaro
44
+++ b/src/intl/ChangeLog.linaro
46
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
48
+ * GCC Linaro 4.8-2013.04 released.
49
--- a/src/ChangeLog.linaro
50
+++ b/src/ChangeLog.linaro
52
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
54
+ * GCC Linaro 4.8-2013.04 released.
55
--- a/src/libmudflap/ChangeLog.linaro
56
+++ b/src/libmudflap/ChangeLog.linaro
58
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
60
+ * GCC Linaro 4.8-2013.04 released.
61
--- a/src/boehm-gc/ChangeLog.linaro
62
+++ b/src/boehm-gc/ChangeLog.linaro
64
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
66
+ Backport from trunk r197770.
68
+ 2013-03-16 Yvan Roux <yvan.roux@linaro.org>
70
+ * include/private/gcconfig.h (AARCH64): New macro (defined only if
72
+ (mach_type_known): Update comment adding ARM AArch64 target.
73
+ (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE,
74
+ OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING,
75
+ DATASTART, DATAEND, STACKBOTTOM): Define for AArch64.
77
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
79
+ * GCC Linaro 4.8-2013.04 released.
80
--- a/src/boehm-gc/include/private/gcconfig.h
81
+++ b/src/boehm-gc/include/private/gcconfig.h
85
/* Determine the machine type: */
86
+#if defined(__aarch64__)
90
+# define mach_type_known
93
# if defined(__arm__) || defined(__thumb__)
95
# if !defined(LINUX) && !defined(NETBSD)
98
# define mach_type_known
100
+# if defined(LINUX) && defined(__aarch64__)
102
+# define mach_type_known
104
# if defined(LINUX) && defined(__arm__)
106
# define mach_type_known
108
/* running Amdahl UTS4 */
109
/* S390 ==> 390-like machine */
111
+ /* AARCH64 ==> ARM AArch64 */
112
/* ARM32 ==> Intel StrongARM */
113
/* IA64 ==> Intel IPF */
115
@@ -1833,6 +1845,32 @@
120
+# define CPP_WORDSZ 64
121
+# define MACH_TYPE "AARCH64"
122
+# define ALIGNMENT 8
124
+# define HBLKSIZE 4096
127
+# define OS_TYPE "LINUX"
128
+# define LINUX_STACKBOTTOM
129
+# define USE_GENERIC_PUSH_REGS
130
+# define DYNAMIC_LOADING
131
+ extern int __data_start[];
132
+# define DATASTART ((ptr_t)__data_start)
133
+ extern char _end[];
134
+# define DATAEND ((ptr_t)(&_end))
137
+ /* __data_start is usually defined in the target linker script. */
138
+ extern int __data_start[];
139
+# define DATASTART ((ptr_t)__data_start)
140
+ extern void *__stack_base__;
141
+# define STACKBOTTOM ((ptr_t)__stack_base__)
146
# define CPP_WORDSZ 32
147
# define MACH_TYPE "ARM32"
148
--- a/src/include/ChangeLog.linaro
149
+++ b/src/include/ChangeLog.linaro
151
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
153
+ * GCC Linaro 4.8-2013.04 released.
154
--- a/src/libiberty/ChangeLog.linaro
155
+++ b/src/libiberty/ChangeLog.linaro
157
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
159
+ * GCC Linaro 4.8-2013.04 released.
160
--- a/src/lto-plugin/ChangeLog.linaro
161
+++ b/src/lto-plugin/ChangeLog.linaro
163
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
165
+ * GCC Linaro 4.8-2013.04 released.
166
--- a/src/contrib/regression/ChangeLog.linaro
167
+++ b/src/contrib/regression/ChangeLog.linaro
169
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
171
+ * GCC Linaro 4.8-2013.04 released.
172
--- a/src/contrib/config-list.mk
173
+++ b/src/contrib/config-list.mk
175
# nohup nice make -j25 -l36 -f ../gcc/contrib/config-list.mk > make.out 2>&1 &
177
# v850e1-elf is rejected by config.sub
178
-LIST = alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
179
+LIST = aarch64-elf aarch64-linux-gnu \
180
+ alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
181
alpha64-dec-vms alpha-dec-vms am33_2.0-linux \
182
arm-wrs-vxworks arm-netbsdelf \
183
arm-linux-androideabi arm-uclinux_eabi arm-eabi \
184
--- a/src/contrib/ChangeLog.linaro
185
+++ b/src/contrib/ChangeLog.linaro
187
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
189
+ Backport from trunk r198443.
190
+ 2013-04-22 Sofiane Naci <sofiane.naci@arm.com>
192
+ * config-list.mk (LIST): Add aarch64-elf and aarch64-linux-gnu.
194
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
196
+ * GCC Linaro 4.8-2013.04 released.
197
--- a/src/contrib/reghunt/ChangeLog.linaro
198
+++ b/src/contrib/reghunt/ChangeLog.linaro
200
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
202
+ * GCC Linaro 4.8-2013.04 released.
203
--- a/src/libatomic/ChangeLog.linaro
204
+++ b/src/libatomic/ChangeLog.linaro
206
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
208
+ * GCC Linaro 4.8-2013.04 released.
209
--- a/src/config/ChangeLog.linaro
210
+++ b/src/config/ChangeLog.linaro
212
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
214
+ * GCC Linaro 4.8-2013.04 released.
215
--- a/src/libbacktrace/ChangeLog.linaro
216
+++ b/src/libbacktrace/ChangeLog.linaro
218
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
220
+ * GCC Linaro 4.8-2013.04 released.
221
--- a/src/libjava/libltdl/ChangeLog.linaro
222
+++ b/src/libjava/libltdl/ChangeLog.linaro
224
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
226
+ * GCC Linaro 4.8-2013.04 released.
227
--- a/src/libjava/ChangeLog.linaro
228
+++ b/src/libjava/ChangeLog.linaro
230
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
232
+ * GCC Linaro 4.8-2013.04 released.
233
--- a/src/libjava/classpath/ChangeLog.linaro
234
+++ b/src/libjava/classpath/ChangeLog.linaro
236
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
238
+ * GCC Linaro 4.8-2013.04 released.
239
--- a/src/gnattools/ChangeLog.linaro
240
+++ b/src/gnattools/ChangeLog.linaro
242
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
244
+ * GCC Linaro 4.8-2013.04 released.
245
--- a/src/maintainer-scripts/ChangeLog.linaro
246
+++ b/src/maintainer-scripts/ChangeLog.linaro
248
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
250
+ * GCC Linaro 4.8-2013.04 released.
251
--- a/src/libgcc/ChangeLog.linaro
252
+++ b/src/libgcc/ChangeLog.linaro
254
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
256
+ Backport from trunk r198090.
257
+ 2013-04-19 Yufeng Zhang <yufeng.zhang@arm.com>
259
+ * config/aarch64/sfp-machine.h (_FP_W_TYPE): Change to define
260
+ as 'unsigned long long' instead of 'unsigned long'.
261
+ (_FP_WS_TYPE): Change to define as 'signed long long' instead of
264
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
266
+ * GCC Linaro 4.8-2013.04 released.
267
--- a/src/libgcc/config/aarch64/sfp-machine.h
268
+++ b/src/libgcc/config/aarch64/sfp-machine.h
270
<http://www.gnu.org/licenses/>. */
272
#define _FP_W_TYPE_SIZE 64
273
-#define _FP_W_TYPE unsigned long
274
-#define _FP_WS_TYPE signed long
275
+#define _FP_W_TYPE unsigned long long
276
+#define _FP_WS_TYPE signed long long
277
#define _FP_I_TYPE int
279
typedef int TItype __attribute__ ((mode (TI)));
280
--- a/src/libgcc/config/libbid/ChangeLog.linaro
281
+++ b/src/libgcc/config/libbid/ChangeLog.linaro
283
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
285
+ * GCC Linaro 4.8-2013.04 released.
286
--- a/src/libdecnumber/ChangeLog.linaro
287
+++ b/src/libdecnumber/ChangeLog.linaro
289
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
291
+ * GCC Linaro 4.8-2013.04 released.
292
--- a/src/gcc/LINARO-VERSION
293
+++ b/src/gcc/LINARO-VERSION
296
--- a/src/gcc/hooks.c
297
+++ b/src/gcc/hooks.c
302
+/* Generic hook that takes (gimple_stmt_iterator *) and returns
305
+hook_bool_gsiptr_false (gimple_stmt_iterator *a ATTRIBUTE_UNUSED)
310
/* Used for the TARGET_ASM_CAN_OUTPUT_MI_THUNK hook. */
312
hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree a ATTRIBUTE_UNUSED,
313
--- a/src/gcc/hooks.h
314
+++ b/src/gcc/hooks.h
316
extern bool hook_bool_const_tree_false (const_tree);
317
extern bool hook_bool_tree_true (tree);
318
extern bool hook_bool_const_tree_true (const_tree);
319
+extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *);
320
extern bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree,
323
--- a/src/gcc/c-family/ChangeLog.linaro
324
+++ b/src/gcc/c-family/ChangeLog.linaro
326
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
328
+ * GCC Linaro 4.8-2013.04 released.
329
--- a/src/gcc/java/ChangeLog.linaro
330
+++ b/src/gcc/java/ChangeLog.linaro
332
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
334
+ * GCC Linaro 4.8-2013.04 released.
335
--- a/src/gcc/c/ChangeLog.linaro
336
+++ b/src/gcc/c/ChangeLog.linaro
338
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
340
+ * GCC Linaro 4.8-2013.04 released.
341
--- a/src/gcc/target.def
342
+++ b/src/gcc/target.def
343
@@ -1289,13 +1289,24 @@
345
tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL)
347
-/* Fold a target-specific builtin. */
348
+/* Fold a target-specific builtin to a tree valid for both GIMPLE
353
tree, (tree fndecl, int n_args, tree *argp, bool ignore),
354
hook_tree_tree_int_treep_bool_null)
356
+/* Fold a target-specific builtin to a valid GIMPLE tree. */
358
+(gimple_fold_builtin,
359
+ "Fold a call to a machine specific built-in function that was set up\n\
360
+by @samp{TARGET_INIT_BUILTINS}. @var{gsi} points to the gimple\n\
361
+statement holding the function call. Returns true if any change\n\
362
+was made to the GIMPLE stream.",
363
+ bool, (gimple_stmt_iterator *gsi),
364
+ hook_bool_gsiptr_false)
366
/* Target hook is used to compare the target attributes in two functions to
367
determine which function's features get higher priority. This is used
368
during function multi-versioning to figure out the order in which two
369
--- a/src/gcc/configure
370
+++ b/src/gcc/configure
371
@@ -1658,7 +1658,8 @@
372
use sysroot as the system root during the build
373
--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
374
--with-specs=SPECS add SPECS to driver command-line processing
375
- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
376
+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
377
+ GCC `cat $srcdir/LINARO-VERSION`"
378
--with-bugurl=URL Direct users to URL to report a bug
379
--with-multilib-list select multilibs (SH and x86-64 only)
380
--with-gnu-ld assume the C compiler uses GNU ld default=no
381
@@ -7327,7 +7328,7 @@
382
*) PKGVERSION="($withval) " ;;
385
- PKGVERSION="(GCC) "
386
+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
390
--- a/src/gcc/objc/ChangeLog.linaro
391
+++ b/src/gcc/objc/ChangeLog.linaro
393
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
395
+ * GCC Linaro 4.8-2013.04 released.
396
--- a/src/gcc/ChangeLog.linaro
397
+++ b/src/gcc/ChangeLog.linaro
399
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
401
+ Backport from trunk r198019.
402
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
404
+ * config/aarch64/aarch64.md (*adds_mul_imm_<mode>): New pattern.
405
+ (*subs_mul_imm_<mode>): New pattern.
407
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
409
+ Backport from trunk r198424-198425.
410
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
412
+ * config/aarch64/aarch64.md (movsi_aarch64): Support LDR/STR
413
+ from/to S register.
414
+ (movdi_aarch64): Support LDR/STR from/to D register.
416
+ 2013-04-29 Ian Bolton <ian.bolton@arm.com>
418
+ * common/config/aarch64/aarch64-common.c: Enable REE pass at O2
419
+ or higher by default.
421
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
423
+ Backport from trunk r198412.
424
+ 2013-04-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
426
+ * config/arm/arm.md (store_minmaxsi): Use only when
427
+ optimize_insn_for_size_p.
429
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
431
+ Backport from trunk 198394,198396-198400,198402-198404.
432
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
434
+ * config/aarch64/arm_neon.h
435
+ (vcvt<sd>_f<32,64>_s<32,64>): Rewrite in C.
436
+ (vcvt<q>_f<32,64>_s<32,64>): Rewrite using builtins.
437
+ (vcvt_<high_>_f<32,64>_f<32,64>): Likewise.
438
+ (vcvt<qsd>_<su><32,64>_f<32,64>): Likewise.
439
+ (vcvta<qsd>_<su><32,64>_f<32,64>): Likewise.
440
+ (vcvtm<qsd>_<su><32,64>_f<32,64>): Likewise.
441
+ (vcvtn<qsd>_<su><32,64>_f<32,64>): Likewise.
442
+ (vcvtp<qsd>_<su><32,64>_f<32,64>): Likewise.
444
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
446
+ * config/aarch64/aarch64-simd.md
447
+ (<optab><VDQF:mode><fcvt_target>2): New, maps to fix, fixuns.
448
+ (<fix_trunc_optab><VDQF:mode><fcvt_target>2): New, maps to
449
+ fix_trunc, fixuns_trunc.
450
+ (ftrunc<VDQF:mode>2): New.
451
+ * config/aarch64/iterators.md (optab): Add fix, fixuns.
452
+ (fix_trunc_optab): New.
454
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
456
+ * config/aarch64/aarch64-builtins.c
457
+ (aarch64_builtin_vectorized_function): Vectorize over ifloorf,
458
+ iceilf, lround, iroundf.
460
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
462
+ * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
463
+ (float_truncate_hi_): Likewise.
464
+ (float_extend_lo_): Likewise.
465
+ (float_truncate_lo_): Likewise.
466
+ * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
467
+ (aarch64_float_extend_lo_v2df): Likewise.
468
+ (vec_unpacks_hi_v4sf): Likewise.
469
+ (aarch64_float_truncate_lo_v2sf): Likewise.
470
+ (aarch64_float_truncate_hi_v4sf): Likewise.
471
+ (vec_pack_trunc_v2df): Likewise.
472
+ (vec_pack_trunc_df): Likewise.
474
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
476
+ * config/aarch64/aarch64-builtins.c
477
+ (aarch64_fold_builtin): Fold float conversions.
478
+ * config/aarch64/aarch64-simd-builtins.def
479
+ (floatv2si, floatv4si, floatv2di): New.
480
+ (floatunsv2si, floatunsv4si, floatunsv2di): Likewise.
481
+ * config/aarch64/aarch64-simd.md
482
+ (<optab><fcvt_target><VDQF:mode>2): New, expands to float and floatuns.
483
+ * config/aarch64/iterators.md (FLOATUORS): New.
484
+ (optab): Add float, floatuns.
485
+ (su_optab): Likewise.
487
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
489
+ * config/aarch64/aarch64-builtins.c
490
+ (aarch64_builtin_vectorized_function): Fold to standard pattern names.
491
+ * config/aarch64/aarch64-simd-builtins.def (frintn): New.
492
+ (frintz): Rename to...
494
+ (frintp): Rename to...
496
+ (frintm): Rename to...
498
+ (frinti): Rename to...
499
+ (nearbyint): ...this.
500
+ (frintx): Rename to...
502
+ (frinta): Rename to...
504
+ * config/aarch64/aarch64-simd.md
505
+ (aarch64_frint<frint_suffix><mode>): Delete.
506
+ (<frint_pattern><mode>2): Convert to insn.
507
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRINTN.
508
+ * config/aarch64/iterators.md (FRINT): Add UNSPEC_FRINTN.
509
+ (frint_pattern): Likewise.
510
+ (frint_suffix): Likewise.
512
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
514
+ Backport from trunk r198302-198306,198316.
515
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
517
+ * config/aarch64/aarch64-simd.md
518
+ (aarch64_simd_bsl<mode>_internal): Rewrite RTL to not use UNSPEC_BSL.
519
+ (aarch64_simd_bsl<mode>): Likewise.
520
+ * config/aarch64/iterators.md (unspec): Remove UNSPEC_BSL.
522
+ 2013-04-25 James Greenhalgh <jame.greenhalgh@arm.com>
524
+ * config/aarch64/aarch64-simd.md (neg<mode>2): Use VDQ iterator.
526
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
528
+ * config/aarch64/aarch64-builtins.c
529
+ (aarch64_fold_builtin): New.
530
+ * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): New.
531
+ * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
532
+ * config/aarch64/aarch64-simd-builtins.def (abs): New.
533
+ * config/aarch64/arm_neon.h
534
+ (vabs<q>_<f32, 64>): Implement using __builtin_aarch64_fabs.
536
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
537
+ Tejas Belagod <tejas.belagod@arm.com>
539
+ * config/aarch64/aarch64-builtins.c
540
+ (aarch64_gimple_fold_builtin): New.
541
+ * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
542
+ * config/aarch64/aarch64-simd-builtins.def (addv): New.
543
+ * config/aarch64/aarch64-simd.md (addpv4sf): New.
544
+ (addvv4sf): Update.
545
+ * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
547
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
549
+ * config/aarch64/aarch64.md
550
+ (*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>): New pattern.
552
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
554
+ * config/aarch64/aarch64.md (*ngc<mode>): New pattern.
555
+ (*ngcsi_uxtw): New pattern.
557
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
559
+ Backport from trunk 198298.
560
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
561
+ Julian Brown <julian@codesourcery.com>
563
+ * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
564
+ (TB_DREG): Add T_V4HF.
565
+ (v4hf_UP): New macro.
566
+ (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
567
+ (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
569
+ Handle initialisation of V4HF. Adjust initialisation of reinterpret
571
+ (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
573
+ (arm_vector_mode_supported_p): Handle V4HF.
574
+ (arm_mangle_map): Handle V4HFmode.
575
+ * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
576
+ * config/arm/arm_neon_builtins.def: Add entries for
577
+ vcvtv4hfv4sf, vcvtv4sfv4hf.
578
+ * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
579
+ (neon_vcvtv4hfv4sf): Likewise.
580
+ * config/arm/neon-gen.ml: Handle half-precision floating point
582
+ * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
583
+ * config/arm/arm_neon.h: Regenerate.
584
+ * config/arm/neon.ml (type elts): Add F16.
585
+ (type vectype): Add T_float16x4, T_floatHF.
586
+ (type vecmode): Add V4HF.
587
+ (type features): Add Requires_FP_bit feature.
588
+ (elt_width): Handle F16.
589
+ (elt_class): Likewise.
590
+ (elt_of_class_width): Likewise.
591
+ (mode_of_elt): Refactor.
592
+ (type_for_elt): Handle F16, fix error messages.
593
+ (vectype_size): Handle T_float16x4.
594
+ (vcvt_sh): New function.
595
+ (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
596
+ (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
597
+ (string_of_mode): Handle V4HF.
598
+ * doc/arm-neon-intrinsics.texi: Regenerate.
600
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
602
+ Backport from trunk r198136-198137,198142,198176.
603
+ 2013-04-23 Andreas Schwab <schwab@linux-m68k.org>
605
+ * coretypes.h (gimple_stmt_iterator): Add struct to make
608
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
610
+ * coretypes.h (gimple_stmt_iterator_d): Forward declare.
611
+ (gimple_stmt_iterator): New typedef.
612
+ * gimple.h (gimple_stmt_iterator): Rename to...
613
+ (gimple_stmt_iterator_d): ... This.
614
+ * doc/tm.texi.in (TARGET_FOLD_BUILTIN): Detail restriction that
615
+ trees be valid for GIMPLE and GENERIC.
616
+ (TARGET_GIMPLE_FOLD_BUILTIN): New.
617
+ * gimple-fold.c (gimple_fold_call): Call target hook
618
+ gimple_fold_builtin.
619
+ * hooks.c (hook_bool_gsiptr_false): New.
620
+ * hooks.h (hook_bool_gsiptr_false): New.
621
+ * target.def (fold_stmt): New.
622
+ * doc/tm.texi: Regenerate.
624
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
626
+ * config/aarch64/aarch64-builtins.c
628
+ (CF0, CF1, CF2, CF3, CF4, CF10): New.
629
+ (VAR<1-12>): Add MAP parameter.
630
+ (BUILTIN_*): Likewise.
631
+ * config/aarch64/aarch64-simd-builtins.def: Set MAP parameter.
632
+ * config/aarch64/aarch64-simd.md (aarch64_sshl_n<mode>): Remove.
633
+ (aarch64_ushl_n<mode>): Likewise.
634
+ (aarch64_sshr_n<mode>): Likewise.
635
+ (aarch64_ushr_n<mode>): Likewise.
636
+ (aarch64_<maxmin><mode>): Likewise.
637
+ (aarch64_sqrt<mode>): Likewise.
638
+ * config/aarch64/arm_neon.h (vshl<q>_n_*): Use new builtin names.
639
+ (vshr<q>_n_*): Likewise.
641
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
643
+ * config/aarch64/aarch64-builtins.c
644
+ (aarch64_simd_builtin_type_mode): Handle SF types.
646
+ (BUILTIN_GPF): Define.
647
+ (aarch64_init_simd_builtins): Handle SF types.
648
+ * config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
649
+ (frecps): Likewise.
650
+ (frecpx): Likewise.
651
+ * config/aarch64/aarch64-simd.md
652
+ (simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
653
+ (aarch64_frecpe<mode>): New.
654
+ (aarch64_frecps<mode>): Likewise.
655
+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
656
+ (v8type): Add frecp<esx>.
657
+ (aarch64_frecp<FRECP:frecp_suffix><mode>): New.
658
+ (aarch64_frecps<mode>): Likewise.
659
+ * config/aarch64/iterators.md (FRECP): New.
660
+ (frecp_suffix): Likewise.
661
+ * config/aarch64/arm_neon.h
662
+ (vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.
664
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
666
+ Backport from trunk r198030.
667
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
669
+ * config/arm/arm.md (movsicc_insn): Convert define_insn into
670
+ define_insn_and_split.
671
+ (and_scc,ior_scc,negscc): Likewise.
672
+ (cmpsi2_addneg, subsi3_compare): Convert to named patterns.
674
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
676
+ Backport from trunk r198020.
677
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
679
+ * config/aarch64/aarch64.md (*adds_<optab><mode>_multp2):
681
+ (*subs_<optab><mode>_multp2): New pattern.
682
+ (*adds_<optab><ALLX:mode>_<GPI:mode>): New pattern.
683
+ (*subs_<optab><ALLX:mode>_<GPI:mode>): New pattern.
685
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
687
+ Backport from trunk r198004,198029.
688
+ 2013-04-17 Greta Yorsh <Greta.Yorsh at arm.com>
690
+ * config/arm/arm.c (use_return_insn): Return 0 for targets that
691
+ can benefit from using a sequence of LDRD instructions in epilogue
692
+ instead of a single LDM instruction.
694
+ 2013-04-16 Greta Yorsh <Greta.Yorsh at arm.com>
696
+ * config/arm/arm.c (emit_multi_reg_push): New declaration
697
+ for an existing function.
698
+ (arm_emit_strd_push): New function.
699
+ (arm_expand_prologue): Used here.
700
+ (arm_emit_ldrd_pop): New function.
701
+ (arm_expand_epilogue): Used here.
702
+ (arm_get_frame_offsets): Update condition.
703
+ (arm_emit_multi_reg_pop): Add a special case for load of a single
704
+ register with writeback.
706
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
708
+ Backport from trunk r197965.
709
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
711
+ * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case.
712
+ * config/arm/arm.md (*anddi3_insn): Change to insn_and_split.
713
+ * config/arm/constraints.md (De): New constraint.
714
+ * config/arm/neon.md (anddi3_neon): Delete.
715
+ (neon_vand<mode>): Expand to standard anddi3 pattern.
716
+ * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
717
+ Move earlier in the file.
718
+ (neon_inv_logic_op2): Likewise.
719
+ (arm_anddi_operand_neon): New predicate.
721
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
723
+ Backport from trunk r197925.
724
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
726
+ * config/arm/arm.md (mov_scc,mov_negscc,mov_notscc): Convert
727
+ define_insn into define_insn_and_split and emit movsicc patterns.
729
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
731
+ Backport from trunk r197807.
732
+ 2013-04-11 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
734
+ * config/aarch64/aarch64.h (REVERSIBLE_CC_MODE): Define.
736
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
738
+ Backport from trunk r197642.
739
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
741
+ * config/arm/arm.md (minmax_arithsi_non_canon): New pattern.
743
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
745
+ Backport from trunk r197530,197921.
746
+ 2013-04-12 Greta Yorsh <Greta.Yorsh@arm.com>
748
+ * config/arm/arm.c (gen_operands_ldrd_strd): Initialize "base".
750
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
752
+ * config/arm/constraints.md (q): New constraint.
753
+ * config/arm/ldrdstrd.md: New file.
754
+ * config/arm/arm.md (ldrdstrd.md) New include.
755
+ (arm_movdi): Use "q" instead of "r" constraint
756
+ for double-word memory access.
757
+ (movdf_soft_insn): Likewise.
758
+ * config/arm/vfp.md (movdi_vfp): Likewise.
759
+ * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md.
760
+ * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration.
761
+ * config/arm/arm.c (gen_operands_ldrd_strd): New function.
762
+ (mem_ok_for_ldrd_strd): Likewise.
763
+ (output_move_double): Update assertion.
765
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
767
+ Backport of trunk r197518-197522,197526-197528.
768
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
770
+ * config/arm/arm.md (arm_smax_insn): Convert define_insn into
771
+ define_insn_and_split.
772
+ (arm_smin_insn,arm_umaxsi3,arm_uminsi3): Likewise.
774
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
776
+ * config/arm/arm.md (arm_ashldi3_1bit): Convert define_insn into
777
+ define_insn_and_split.
778
+ (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise.
779
+ (shiftsi3_compare): New pattern.
780
+ (rrx): New pattern.
781
+ * config/arm/unspecs.md (UNSPEC_RRX): New.
783
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
785
+ * config/arm/arm.md (negdi_extendsidi): New pattern.
786
+ (negdi_zero_extendsidi): Likewise.
788
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
790
+ * config/arm/arm.md (andsi_iorsi3_notsi): Convert define_insn into
791
+ define_insn_and_split.
792
+ (arm_negdi2,arm_abssi2,arm_neg_abssi2): Likewise.
793
+ (arm_cmpdi_insn,arm_cmpdi_unsigned): Likewise.
795
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
797
+ * config/arm/arm.md (arm_subdi3): Convert define_insn into
798
+ define_insn_and_split.
799
+ (subdi_di_zesidi,subdi_di_sesidi): Likewise.
800
+ (subdi_zesidi_di,subdi_sesidi_di,subdi_zesidi_zesidi): Likewise.
802
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
804
+ * config/arm/arm.md (subsi3_carryin): New pattern.
805
+ (subsi3_carryin_const): Likewise.
806
+ (subsi3_carryin_compare,subsi3_carryin_compare_const): Likewise.
807
+ (subsi3_carryin_shift,rsbsi3_carryin_shift): Likewise.
809
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
811
+ * config/arm/arm.md (incscc,arm_incscc,decscc,arm_decscc): Delete.
813
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
815
+ * config/arm/arm.md (addsi3_carryin_<optab>): Set attribute predicable.
816
+ (addsi3_carryin_alt2_<optab>,addsi3_carryin_shift_<optab>): Likewise.
818
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
820
+ Backport of trunk r197517.
821
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
823
+ * config/arm/arm.c (arm_expand_builtin): Change fcode
824
+ type to unsigned int.
826
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
828
+ Backport of trunk r197513.
829
+ 2013-04-05 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
831
+ * doc/invoke.texi (ARM Options): Document cortex-a53 support.
833
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
835
+ Backport of trunk r197489-197491.
836
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
838
+ * config/arm/arm-protos.h (arm_builtin_vectorized_function):
839
+ New function prototype.
840
+ * config/arm/arm.c (TARGET_VECTORIZE_BUILTINS): Define.
841
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
842
+ (arm_builtin_vectorized_function): New function.
844
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
846
+ * config/arm/arm_neon_builtins.def: New file.
847
+ * config/arm/arm.c (neon_builtin_data): Move contents to
848
+ arm_neon_builtins.def.
849
+ (enum arm_builtins): Include neon builtin definitions.
850
+ (ARM_BUILTIN_NEON_BASE): Move from enum to macro.
851
+ * config/arm/t-arm (arm.o): Add dependency on
852
+ arm_neon_builtins.def.
854
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
856
+ Backport of trunk 196795-196797,196957
857
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
859
+ * config/aarch64/aarch64.md (*sub<mode>3_carryin): New pattern.
860
+ (*subsi3_carryin_uxtw): Likewise.
862
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
864
+ * config/aarch64/aarch64.md (*ror<mode>3_insn): New pattern.
865
+ (*rorsi3_insn_uxtw): Likewise.
867
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
869
+ * config/aarch64/aarch64.md (*extr<mode>5_insn): New pattern.
870
+ (*extrsi5_insn_uxtw): Likewise.
872
+2013-04-10 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
874
+ * LINARO-VERSION: Bump version number.
876
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
878
+ * GCC Linaro 4.8-2013.04 released.
880
+ * LINARO-VERSION: New file.
881
+ * configure.ac: Add Linaro version string.
882
+ * configure: Regenerate.
884
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
886
+ Backport of trunk r197346.
887
+ 2013-04-02 Ian Caulfield <ian.caulfield@arm.com>
888
+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
890
+ * config/arm/arm-arches.def (armv8-a): Default to cortex-a53.
891
+ * config/arm/t-arm (MD_INCLUDES): Depend on cortex-a53.md.
892
+ * config/arm/cortex-a53.md: New file.
893
+ * config/arm/bpabi.h (BE8_LINK_SPEC): Handle cortex-a53.
894
+ * config/arm/arm.md (generic_sched, generic_vfp): Handle cortex-a53.
895
+ * config/arm/arm.c (arm_issue_rate): Likewise.
896
+ * config/arm/arm-tune.md: Regenerate
897
+ * config/arm/arm-tables.opt: Regenerate.
898
+ * config/arm/arm-cores.def: Add cortex-a53.
900
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
902
+ Backport of trunk r197342.
903
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
905
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add variants for
906
+ scalar load/store operations using B/H registers.
907
+ (*zero_extend<SHORT:mode><GPI:mode>2_aarch64): Likewise.
909
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
911
+ Backport of trunk r197341.
912
+ 2013-04-02 Sofiane Naci <sofiane.naci@arm.com>
914
+ * config/aarch64/aarch64.md (*mov<mode>_aarch64): Add alternatives for
916
+ * config/aarch64/aarch64.c
917
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
918
+ * config/aarch64/aarch64-protos.h
919
+ (aarch64_simd_scalar_immediate_valid_for_move): New.
920
+ * config/aarch64/constraints.md (Dh, Dq): New.
921
+ * config/aarch64/iterators.md (hq): New.
923
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
925
+ Backport from trunk r197207.
926
+ 2013-03-28 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
928
+ * config/aarch64/aarch64.md (*and<mode>3_compare0): New pattern.
929
+ (*andsi3_compare0_uxtw): New pattern.
930
+ (*and_<SHIFT:optab><mode>3_compare0): New pattern.
931
+ (*and_<SHIFT:optab>si3_compare0_uxtw): New pattern.
933
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
935
+ Backport from trunk r197153.
936
+ 2013-03-27 Terry Guo <terry.guo@arm.com>
938
+ * config/arm/arm-cores.def: Added core cortex-r7.
939
+ * config/arm/arm-tune.md: Regenerated.
940
+ * config/arm/arm-tables.opt: Regenerated.
941
+ * doc/invoke.texi: Added entry for core cortex-r7.
943
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
945
+ Backport from trunk r197052.
946
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
948
+ * config/arm/arm.md (f_sels, f_seld): New types.
949
+ (*cmov<mode>): New pattern.
950
+ * config/arm/predicates.md (arm_vsel_comparison_operator): New
953
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
955
+ Backport from trunk r197046.
956
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
958
+ * config/arm/arm.c (arm_emit_load_exclusive): Add acq parameter.
959
+ Emit load-acquire versions when acq is true.
960
+ (arm_emit_store_exclusive): Add rel parameter.
961
+ Emit store-release versions when rel is true.
962
+ (arm_split_compare_and_swap): Use acquire-release instructions
964
+ of barriers when appropriate.
965
+ (arm_split_atomic_op): Likewise.
966
+ * config/arm/arm.h (TARGET_HAVE_LDACQ): New macro.
967
+ * config/arm/unspecs.md (VUNSPEC_LAX): New unspec.
968
+ (VUNSPEC_SLX): Likewise.
969
+ (VUNSPEC_LDA): Likewise.
970
+ (VUNSPEC_STL): Likewise.
971
+ * config/arm/sync.md (atomic_load<mode>): New pattern.
972
+ (atomic_store<mode>): Likewise.
973
+ (arm_load_acquire_exclusive<mode>): Likewise.
974
+ (arm_load_acquire_exclusivesi): Likewise.
975
+ (arm_load_acquire_exclusivedi): Likewise.
976
+ (arm_store_release_exclusive<mode>): Likewise.
978
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
980
+ Backport from trunk r196876.
981
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
983
+ * config/arm/arm-protos.h (tune_params): Add
984
+ prefer_neon_for_64bits field.
985
+ * config/arm/arm.c (prefer_neon_for_64bits): New variable.
986
+ (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
987
+ (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
988
+ (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
989
+ (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
990
+ (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
991
+ (arm_option_override): Handle -mneon-for-64bits new option.
992
+ * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
993
+ (prefer_neon_for_64bits): Declare new variable.
994
+ * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
995
+ avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
997
+ (arch_enabled): Handle new arch types. Remove support for onlya8
999
+ (one_cmpldi2): Use new arch names.
1000
+ * config/arm/arm.opt (mneon-for-64bits): Add option.
1001
+ * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
1002
+ (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
1003
+ neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
1005
+ * doc/invoke.texi (-mneon-for-64bits): Document.
1007
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1009
+ Backport from trunk r196858.
1010
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1012
+ * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute.
1013
+ (abd<mode>_3): New pattern.
1014
+ (aba<mode>_3): New pattern.
1015
+ (fabd<mode>_3): New pattern.
1017
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
1019
+ Backport from trunk r196856.
1020
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
1022
+ * config/aarch64/aarch64-elf.h (REGISTER_PREFIX): Remove.
1023
+ * config/aarch64/aarch64.c (aarch64_print_operand): Remove all
1024
+ occurrence of REGISTER_PREFIX as its empty string.
1025
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1026
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
1028
+/* { dg-do compile } */
1029
+/* { dg-require-effective-target arm_v8_neon_ok } */
1030
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1031
+/* { dg-add-options arm_v8_neon } */
1036
+foo (float *output, float *input)
1039
+ /* Vectorizable. */
1040
+ for (i = 0; i < N; i++)
1041
+ output[i] = __builtin_floorf (input[i]);
1044
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_floorf } } } */
1045
+/* { dg-final { cleanup-tree-dump "vect" } } */
1046
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1047
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
1049
+/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
1050
+/* This file was autogenerated by neon-testgen. */
1052
+/* { dg-do assemble } */
1053
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1054
+/* { dg-options "-save-temps -O0" } */
1055
+/* { dg-add-options arm_neon_fp16 } */
1057
+#include "arm_neon.h"
1059
+void test_vcvtf32_f16 (void)
1061
+ float32x4_t out_float32x4_t;
1062
+ float16x4_t arg0_float16x4_t;
1064
+ out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
1067
+/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1068
+/* { dg-final { cleanup-saved-temps } } */
1069
--- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1070
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
1072
+/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
1073
+/* This file was autogenerated by neon-testgen. */
1075
+/* { dg-do assemble } */
1076
+/* { dg-require-effective-target arm_neon_fp16_ok } */
1077
+/* { dg-options "-save-temps -O0" } */
1078
+/* { dg-add-options arm_neon_fp16 } */
1080
+#include "arm_neon.h"
1082
+void test_vcvtf16_f32 (void)
1084
+ float16x4_t out_float16x4_t;
1085
+ float32x4_t arg0_float32x4_t;
1087
+ out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
1090
+/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
1091
+/* { dg-final { cleanup-saved-temps } } */
1092
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1093
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c
1095
+/* { dg-do compile } */
1096
+/* { dg-options "-O1" } */
1099
+muld (unsigned long long X, unsigned long long Y)
1101
+ unsigned long long mask = 0xffffffffull;
1102
+ return (X & mask) * (Y & mask);
1105
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
1106
--- a/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1107
+++ b/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c
1109
+/* { dg-do compile } */
1110
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1111
+/* { dg-options "-O2" } */
1112
+int foo(int a, int b, int* p, int *q)
1119
+/* { dg-final { scan-assembler "ldrd" } } */
1120
--- a/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1121
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtdf.c
1123
+/* { dg-do compile } */
1124
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1125
+/* { dg-options "-O2" } */
1126
+/* { dg-add-options arm_v8_vfp } */
1129
+foo (double x, double y)
1131
+ volatile int i = 0;
1132
+ return i > 0 ? x : y;
1135
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
1136
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1137
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
1139
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1140
+/* { dg-do compile } */
1141
+/* { dg-options "-O2" } */
1142
+/* { dg-add-options arm_arch_v8a } */
1144
+#include "../aarch64/atomic-op-relaxed.x"
1146
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1147
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1148
+/* { dg-final { scan-assembler-not "dmb" } } */
1149
--- a/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1150
+++ b/src/gcc/testsuite/gcc.target/arm/vselgesf.c
1152
+/* { dg-do compile } */
1153
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1154
+/* { dg-options "-O2" } */
1155
+/* { dg-add-options arm_v8_vfp } */
1158
+foo (float x, float y)
1160
+ volatile int i = 0;
1161
+ return i >= 0 ? x : y;
1164
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
1165
--- a/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1166
+++ b/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c
1168
+/* { dg-do compile } */
1169
+/* { dg-require-effective-target arm_prefer_ldrd_strd } */
1170
+/* { dg-options "-O2" } */
1171
+void foo(int a, int b, int* p)
1176
+/* { dg-final { scan-assembler "strd" } } */
1177
--- a/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1178
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-1.c
1180
+/* { dg-do compile } */
1181
+/* { dg-require-effective-target arm32 } */
1182
+/* { dg-options "-O2" } */
1184
+signed long long extendsidi_negsi (signed int x)
1192
+ mov r1, r0, asr #31
1194
+/* { dg-final { scan-assembler-times "rsb" 1 { target { arm_nothumb } } } } */
1195
+/* { dg-final { scan-assembler-times "negs\\t" 1 { target { ! { arm_nothumb } } } } } */
1196
+/* { dg-final { scan-assembler-times "asr" 1 } } */
1197
--- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1198
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
1200
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1201
+/* { dg-do compile } */
1202
+/* { dg-options "-O2" } */
1203
+/* { dg-add-options arm_arch_v8a } */
1205
+#include "../aarch64/atomic-comp-swap-release-acquire.x"
1207
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 4 } } */
1208
+/* { dg-final { scan-assembler-times "stlex" 4 } } */
1209
+/* { dg-final { scan-assembler-not "dmb" } } */
1210
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1211
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
1213
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1214
+/* { dg-do compile } */
1215
+/* { dg-options "-O2" } */
1216
+/* { dg-add-options arm_arch_v8a } */
1218
+#include "../aarch64/atomic-op-seq_cst.x"
1220
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1221
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1222
+/* { dg-final { scan-assembler-not "dmb" } } */
1223
--- a/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1224
+++ b/src/gcc/testsuite/gcc.target/arm/vselgedf.c
1226
+/* { dg-do compile } */
1227
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1228
+/* { dg-options "-O2" } */
1229
+/* { dg-add-options arm_v8_vfp } */
1232
+foo (double x, double y)
1234
+ volatile int i = 0;
1235
+ return i >= 0 ? x : y;
1238
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
1239
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1240
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
1242
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1243
+/* { dg-do compile } */
1244
+/* { dg-options "-O2" } */
1245
+/* { dg-add-options arm_arch_v8a } */
1247
+#include "../aarch64/atomic-op-consume.x"
1249
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1250
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1251
+/* { dg-final { scan-assembler-not "dmb" } } */
1252
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1253
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c
1255
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1256
+/* { dg-do compile } */
1257
+/* { dg-options "-O2" } */
1258
+/* { dg-add-options arm_arch_v8a } */
1260
+#include "../aarch64/atomic-op-char.x"
1262
+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1263
+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1264
+/* { dg-final { scan-assembler-not "dmb" } } */
1265
--- a/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1266
+++ b/src/gcc/testsuite/gcc.target/arm/vselnesf.c
1268
+/* { dg-do compile } */
1269
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1270
+/* { dg-options "-O2" } */
1271
+/* { dg-add-options arm_v8_vfp } */
1274
+foo (float x, float y)
1276
+ volatile int i = 0;
1277
+ return i != 0 ? x : y;
1280
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
1281
--- a/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1282
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-2.c
1284
+/* { dg-do compile } */
1285
+/* { dg-require-effective-target arm32 } */
1286
+/* { dg-options "-O2" } */
1288
+signed long long zero_extendsidi_negsi (unsigned int x)
1297
+/* { dg-final { scan-assembler-times "rsb\\tr0, r0, #0" 1 { target { arm_nothumb } } } } */
1298
+/* { dg-final { scan-assembler-times "negs\\tr0, r0" 1 { target { ! arm_nothumb } } } } */
1299
+/* { dg-final { scan-assembler-times "mov" 1 } } */
1300
--- a/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1301
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcsf.c
1303
+/* { dg-do compile } */
1304
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1305
+/* { dg-options "-O2" } */
1306
+/* { dg-add-options arm_v8_vfp } */
1309
+foo (float x, float y)
1311
+ return !__builtin_isunordered (x, y) ? x : y;
1314
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1315
--- a/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1316
+++ b/src/gcc/testsuite/gcc.target/arm/minmax_minus.c
1318
+/* { dg-do compile } */
1319
+/* { dg-options "-O2" } */
1321
+#define MAX(a, b) (a > b ? a : b)
1323
+foo (int a, int b, int c)
1325
+ return c - MAX (a, b);
1328
+/* { dg-final { scan-assembler "rsbge" } } */
1329
+/* { dg-final { scan-assembler "rsblt" } } */
1330
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1331
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c
1333
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1334
+/* { dg-do compile } */
1335
+/* { dg-options "-O2" } */
1336
+/* { dg-add-options arm_arch_v8a } */
1338
+#include "../aarch64/atomic-op-release.x"
1340
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1341
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1342
+/* { dg-final { scan-assembler-not "dmb" } } */
1343
--- a/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1344
+++ b/src/gcc/testsuite/gcc.target/arm/vselvssf.c
1346
+/* { dg-do compile } */
1347
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1348
+/* { dg-options "-O2" } */
1349
+/* { dg-add-options arm_v8_vfp } */
1352
+foo (float x, float y)
1354
+ return __builtin_isunordered (x, y) ? x : y;
1357
+/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */
1358
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1359
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
1361
+/* { dg-do compile } */
1362
+/* { dg-require-effective-target arm_v8_neon_ok } */
1363
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1364
+/* { dg-add-options arm_v8_neon } */
1369
+foo (float *output, float *input)
1372
+ /* Vectorizable. */
1373
+ for (i = 0; i < N; i++)
1374
+ output[i] = __builtin_roundf (input[i]);
1377
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_roundf } } } */
1378
+/* { dg-final { cleanup-tree-dump "vect" } } */
1379
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1380
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
1382
+/* Check that Neon is *not* used by default to handle 64-bits scalar
1385
+/* { dg-do compile } */
1386
+/* { dg-require-effective-target arm_neon_ok } */
1387
+/* { dg-options "-O2" } */
1388
+/* { dg-add-options arm_neon } */
1390
+typedef long long i64;
1391
+typedef unsigned long long u64;
1392
+typedef unsigned int u32;
1395
+/* Unary operators */
1396
+#define UNARY_OP(name, op) \
1397
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
1399
+/* Binary operators */
1400
+#define BINARY_OP(name, op) \
1401
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
1403
+/* Unsigned shift */
1404
+#define SHIFT_U(name, op, amount) \
1405
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
1408
+#define SHIFT_S(name, op, amount) \
1409
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
1419
+SHIFT_U(right1, >>, 1)
1420
+SHIFT_U(right2, >>, 2)
1421
+SHIFT_U(right5, >>, 5)
1422
+SHIFT_U(rightn, >>, c)
1424
+SHIFT_S(right1, >>, 1)
1425
+SHIFT_S(right2, >>, 2)
1426
+SHIFT_S(right5, >>, 5)
1427
+SHIFT_S(rightn, >>, c)
1429
+/* { dg-final {scan-assembler-times "vmvn" 0} } */
1430
+/* { dg-final {scan-assembler-times "vadd" 0} } */
1431
+/* { dg-final {scan-assembler-times "vsub" 0} } */
1432
+/* { dg-final {scan-assembler-times "vand" 0} } */
1433
+/* { dg-final {scan-assembler-times "vorr" 0} } */
1434
+/* { dg-final {scan-assembler-times "veor" 0} } */
1435
+/* { dg-final {scan-assembler-times "vshr" 0} } */
1436
--- a/src/gcc/testsuite/gcc.target/arm/negdi-3.c
1437
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-3.c
1439
+/* { dg-do compile } */
1440
+/* { dg-require-effective-target arm32 } */
1441
+/* { dg-options "-O2" } */
1443
+signed long long negdi_zero_extendsidi (unsigned int x)
1445
+ return -((signed long long) x);
1452
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
1453
+/* { dg-final { scan-assembler-times "sbc" 1 } } */
1454
+/* { dg-final { scan-assembler-times "mov" 0 } } */
1455
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
1456
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
1457
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
1459
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1460
+/* { dg-do compile } */
1461
+/* { dg-options "-O2" } */
1462
+/* { dg-add-options arm_arch_v8a } */
1464
+#include "../aarch64/atomic-op-acq_rel.x"
1466
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1467
+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1468
+/* { dg-final { scan-assembler-not "dmb" } } */
1469
--- a/src/gcc/testsuite/gcc.target/arm/vselltsf.c
1470
+++ b/src/gcc/testsuite/gcc.target/arm/vselltsf.c
1472
+/* { dg-do compile } */
1473
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1474
+/* { dg-options "-O2" } */
1475
+/* { dg-add-options arm_v8_vfp } */
1478
+foo (float x, float y)
1480
+ volatile int i = 0;
1481
+ return i < 0 ? x : y;
1484
+/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */
1485
--- a/src/gcc/testsuite/gcc.target/arm/vselnedf.c
1486
+++ b/src/gcc/testsuite/gcc.target/arm/vselnedf.c
1488
+/* { dg-do compile } */
1489
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1490
+/* { dg-options "-O2" } */
1491
+/* { dg-add-options arm_v8_vfp } */
1494
+foo (double x, double y)
1496
+ volatile int i = 0;
1497
+ return i != 0 ? x : y;
1500
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
1501
--- a/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
1502
+++ b/src/gcc/testsuite/gcc.target/arm/vselvcdf.c
1504
+/* { dg-do compile } */
1505
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1506
+/* { dg-options "-O2" } */
1507
+/* { dg-add-options arm_v8_vfp } */
1510
+foo (double x, double y)
1512
+ return !__builtin_isunordered (x, y) ? x : y;
1515
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
1516
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
1517
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
1519
+/* { dg-do compile } */
1520
+/* { dg-require-effective-target arm_v8_neon_ok } */
1521
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1522
+/* { dg-add-options arm_v8_neon } */
1527
+foo (float *output, float *input)
1530
+ /* Vectorizable. */
1531
+ for (i = 0; i < N; i++)
1532
+ output[i] = __builtin_truncf (input[i]);
1535
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_btruncf } } } */
1536
+/* { dg-final { cleanup-tree-dump "vect" } } */
1537
--- a/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
1538
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqsf.c
1540
+/* { dg-do compile } */
1541
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1542
+/* { dg-options "-O2" } */
1543
+/* { dg-add-options arm_v8_vfp } */
1546
+foo (float x, float y)
1548
+ volatile int i = 0;
1549
+ return i == 0 ? x : y;
1552
+/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */
1553
--- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
1554
+++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
1556
+/* Check that Neon is used to handle 64-bits scalar operations. */
1558
+/* { dg-do compile } */
1559
+/* { dg-require-effective-target arm_neon_ok } */
1560
+/* { dg-options "-O2 -mneon-for-64bits" } */
1561
+/* { dg-add-options arm_neon } */
1563
+typedef long long i64;
1564
+typedef unsigned long long u64;
1565
+typedef unsigned int u32;
1568
+/* Unary operators */
1569
+#define UNARY_OP(name, op) \
1570
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
1572
+/* Binary operators */
1573
+#define BINARY_OP(name, op) \
1574
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
1576
+/* Unsigned shift */
1577
+#define SHIFT_U(name, op, amount) \
1578
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
1581
+#define SHIFT_S(name, op, amount) \
1582
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
1592
+SHIFT_U(right1, >>, 1)
1593
+SHIFT_U(right2, >>, 2)
1594
+SHIFT_U(right5, >>, 5)
1595
+SHIFT_U(rightn, >>, c)
1597
+SHIFT_S(right1, >>, 1)
1598
+SHIFT_S(right2, >>, 2)
1599
+SHIFT_S(right5, >>, 5)
1600
+SHIFT_S(rightn, >>, c)
1602
+/* { dg-final {scan-assembler-times "vmvn" 1} } */
1603
+/* Two vadd: 1 in unary_not, 1 in binary_add */
1604
+/* { dg-final {scan-assembler-times "vadd" 2} } */
1605
+/* { dg-final {scan-assembler-times "vsub" 1} } */
1606
+/* { dg-final {scan-assembler-times "vand" 1} } */
1607
+/* { dg-final {scan-assembler-times "vorr" 1} } */
1608
+/* { dg-final {scan-assembler-times "veor" 1} } */
1609
+/* 6 vshr for right shifts by constant, and variable right shift uses
1610
+ vshl with a negative amount in register. */
1611
+/* { dg-final {scan-assembler-times "vshr" 6} } */
1612
+/* { dg-final {scan-assembler-times "vshl" 2} } */
1613
--- a/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
1614
+++ b/src/gcc/testsuite/gcc.target/arm/vselvsdf.c
1616
+/* { dg-do compile } */
1617
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1618
+/* { dg-options "-O2" } */
1619
+/* { dg-add-options arm_v8_vfp } */
1622
+foo (double x, double y)
1624
+ return __builtin_isunordered (x, y) ? x : y;
1627
+/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */
1628
--- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
1629
+++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c
1631
+/* { dg-do compile } */
1632
+/* { dg-options "-O1" } */
1634
+long long muld(long long X, long long Y)
1639
+/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */
1640
--- a/src/gcc/testsuite/gcc.target/arm/negdi-4.c
1641
+++ b/src/gcc/testsuite/gcc.target/arm/negdi-4.c
1643
+/* { dg-do compile } */
1644
+/* { dg-require-effective-target arm32 } */
1645
+/* { dg-options "-O2" } */
1647
+signed long long negdi_extendsidi (signed int x)
1649
+ return -((signed long long) x);
1654
+ mov r1, r0, asr #31
1656
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
1657
+/* { dg-final { scan-assembler-times "asr" 1 } } */
1658
+/* { dg-final { scan-assembler-times "rsc" 0 } } */
1659
--- a/src/gcc/testsuite/gcc.target/arm/vselltdf.c
1660
+++ b/src/gcc/testsuite/gcc.target/arm/vselltdf.c
1662
+/* { dg-do compile } */
1663
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1664
+/* { dg-options "-O2" } */
1665
+/* { dg-add-options arm_v8_vfp } */
1668
+foo (double x, double y)
1670
+ volatile int i = 0;
1671
+ return i < 0 ? x : y;
1674
+/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */
1675
--- a/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
1676
+++ b/src/gcc/testsuite/gcc.target/arm/vseleqdf.c
1678
+/* { dg-do compile } */
1679
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1680
+/* { dg-options "-O2" } */
1681
+/* { dg-add-options arm_v8_vfp } */
1684
+foo (double x, double y)
1686
+ volatile int i = 0;
1687
+ return i == 0 ? x : y;
1690
+/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */
1691
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
1692
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
1694
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1695
+/* { dg-do compile } */
1696
+/* { dg-options "-O2" } */
1697
+/* { dg-add-options arm_arch_v8a } */
1699
+#include "../aarch64/atomic-op-acquire.x"
1701
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1702
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1703
+/* { dg-final { scan-assembler-not "dmb" } } */
1704
--- a/src/gcc/testsuite/gcc.target/arm/vsellesf.c
1705
+++ b/src/gcc/testsuite/gcc.target/arm/vsellesf.c
1707
+/* { dg-do compile } */
1708
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1709
+/* { dg-options "-O2" } */
1710
+/* { dg-add-options arm_v8_vfp } */
1713
+foo (float x, float y)
1715
+ volatile int i = 0;
1716
+ return i <= 0 ? x : y;
1719
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
1720
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
1721
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c
1723
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1724
+/* { dg-do compile } */
1725
+/* { dg-options "-O2" } */
1726
+/* { dg-add-options arm_arch_v8a } */
1728
+#include "../aarch64/atomic-op-int.x"
1730
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1731
+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1732
+/* { dg-final { scan-assembler-not "dmb" } } */
1733
--- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
1734
+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c
1736
+/* { dg-require-effective-target arm_arch_v8a_ok } */
1737
+/* { dg-do compile } */
1738
+/* { dg-options "-O2" } */
1739
+/* { dg-add-options arm_arch_v8a } */
1741
+#include "../aarch64/atomic-op-short.x"
1743
+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1744
+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */
1745
+/* { dg-final { scan-assembler-not "dmb" } } */
1746
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
1747
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
1749
+/* { dg-do compile } */
1750
+/* { dg-require-effective-target arm_v8_neon_ok } */
1751
+/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */
1752
+/* { dg-add-options arm_v8_neon } */
1757
+foo (float *output, float *input)
1760
+ /* Vectorizable. */
1761
+ for (i = 0; i < N; i++)
1762
+ output[i] = __builtin_ceilf (input[i]);
1765
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_ceilf } } } */
1766
+/* { dg-final { cleanup-tree-dump "vect" } } */
1767
--- a/src/gcc/testsuite/gcc.target/arm/vselledf.c
1768
+++ b/src/gcc/testsuite/gcc.target/arm/vselledf.c
1770
+/* { dg-do compile } */
1771
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1772
+/* { dg-options "-O2" } */
1773
+/* { dg-add-options arm_v8_vfp } */
1776
+foo (double x, double y)
1778
+ volatile int i = 0;
1779
+ return i <= 0 ? x : y;
1782
+/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */
1783
--- a/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
1784
+++ b/src/gcc/testsuite/gcc.target/arm/vselgtsf.c
1786
+/* { dg-do compile } */
1787
+/* { dg-require-effective-target arm_v8_vfp_ok } */
1788
+/* { dg-options "-O2" } */
1789
+/* { dg-add-options arm_v8_vfp } */
1792
+foo (float x, float y)
1794
+ volatile int i = 0;
1795
+ return i > 0 ? x : y;
1798
+/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */
1799
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
1800
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecps.c
1802
+/* { dg-do run } */
1803
+/* { dg-options "-O3 --save-temps" } */
1805
+#include <arm_neon.h>
1807
+#include <stdlib.h>
1810
+test_frecps_float32_t (void)
1813
+ float32_t value = 0.2;
1814
+ float32_t reciprocal = 5.0;
1815
+ float32_t step = vrecpes_f32 (value);
1816
+ /* 3 steps should give us within ~0.001 accuracy. */
1817
+ for (i = 0; i < 3; i++)
1818
+ step = step * vrecpss_f32 (step, value);
1820
+ return fabs (step - reciprocal) < 0.001;
1823
+/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
1824
+/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
1827
+test_frecps_float32x2_t (void)
1832
+ const float32_t value_pool[] = {0.2, 0.4};
1833
+ const float32_t reciprocal_pool[] = {5.0, 2.5};
1834
+ float32x2_t value = vld1_f32 (value_pool);
1835
+ float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
1837
+ float32x2_t step = vrecpe_f32 (value);
1838
+ /* 3 steps should give us within ~0.001 accuracy. */
1839
+ for (i = 0; i < 3; i++)
1840
+ step = step * vrecps_f32 (step, value);
1842
+ ret &= fabs (vget_lane_f32 (step, 0)
1843
+ - vget_lane_f32 (reciprocal, 0)) < 0.001;
1844
+ ret &= fabs (vget_lane_f32 (step, 1)
1845
+ - vget_lane_f32 (reciprocal, 1)) < 0.001;
1850
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
1851
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
1854
+test_frecps_float32x4_t (void)
1859
+ const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
1860
+ const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
1861
+ float32x4_t value = vld1q_f32 (value_pool);
1862
+ float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
1864
+ float32x4_t step = vrecpeq_f32 (value);
1865
+ /* 3 steps should give us within ~0.001 accuracy. */
1866
+ for (i = 0; i < 3; i++)
1867
+ step = step * vrecpsq_f32 (step, value);
1869
+ ret &= fabs (vgetq_lane_f32 (step, 0)
1870
+ - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
1871
+ ret &= fabs (vgetq_lane_f32 (step, 1)
1872
+ - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
1873
+ ret &= fabs (vgetq_lane_f32 (step, 2)
1874
+ - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
1875
+ ret &= fabs (vgetq_lane_f32 (step, 3)
1876
+ - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
1881
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
1882
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
1885
+test_frecps_float64_t (void)
1888
+ float64_t value = 0.2;
1889
+ float64_t reciprocal = 5.0;
1890
+ float64_t step = vrecped_f64 (value);
1891
+ /* 3 steps should give us within ~0.001 accuracy. */
1892
+ for (i = 0; i < 3; i++)
1893
+ step = step * vrecpsd_f64 (step, value);
1895
+ return fabs (step - reciprocal) < 0.001;
1898
+/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
1899
+/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
1902
+test_frecps_float64x2_t (void)
1907
+ const float64_t value_pool[] = {0.2, 0.4};
1908
+ const float64_t reciprocal_pool[] = {5.0, 2.5};
1909
+ float64x2_t value = vld1q_f64 (value_pool);
1910
+ float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
1912
+ float64x2_t step = vrecpeq_f64 (value);
1913
+ /* 3 steps should give us within ~0.001 accuracy. */
1914
+ for (i = 0; i < 3; i++)
1915
+ step = step * vrecpsq_f64 (step, value);
1917
+ ret &= fabs (vgetq_lane_f64 (step, 0)
1918
+ - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
1919
+ ret &= fabs (vgetq_lane_f64 (step, 1)
1920
+ - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
1925
+/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
1926
+/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
1929
+main (int argc, char **argv)
1931
+ if (!test_frecps_float32_t ())
1933
+ if (!test_frecps_float32x2_t ())
1935
+ if (!test_frecps_float32x4_t ())
1937
+ if (!test_frecps_float64_t ())
1939
+ if (!test_frecps_float64x2_t ())
1945
+/* { dg-final { cleanup-saved-temps } } */
1946
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
1947
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x
1952
+atomic_fetch_add_ACQ_REL (int a)
1954
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
1958
+atomic_fetch_sub_ACQ_REL (int a)
1960
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
1964
+atomic_fetch_and_ACQ_REL (int a)
1966
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
1970
+atomic_fetch_nand_ACQ_REL (int a)
1972
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
1976
+atomic_fetch_xor_ACQ_REL (int a)
1978
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
1982
+atomic_fetch_or_ACQ_REL (int a)
1984
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
1986
--- a/src/gcc/testsuite/gcc.target/aarch64/extr.c
1987
+++ b/src/gcc/testsuite/gcc.target/aarch64/extr.c
1989
+/* { dg-options "-O2 --save-temps" } */
1990
+/* { dg-do run } */
1992
+extern void abort (void);
1995
+test_si (int a, int b)
1997
+ /* { dg-final { scan-assembler "extr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, 27\n" } } */
1998
+ return (a << 5) | ((unsigned int) b >> 27);
2002
+test_di (long long a, long long b)
2004
+ /* { dg-final { scan-assembler "extr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, 45\n" } } */
2005
+ return (a << 19) | ((unsigned long long) b >> 45);
2013
+ v = test_si (0x00000004, 0x30000000);
2014
+ if (v != 0x00000086)
2016
+ w = test_di (0x0001040040040004ll, 0x0070050066666666ll);
2017
+ if (w != 0x2002002000200380ll)
2022
+/* { dg-final { cleanup-saved-temps } } */
2023
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2024
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c
2026
/* { dg-final { scan-assembler "uminv" } } */
2027
/* { dg-final { scan-assembler "smaxv" } } */
2028
/* { dg-final { scan-assembler "sminv" } } */
2029
+/* { dg-final { scan-assembler "sabd" } } */
2030
+/* { dg-final { scan-assembler "saba" } } */
2031
/* { dg-final { scan-assembler-times "addv" 2} } */
2032
/* { dg-final { scan-assembler-times "addp" 2} } */
2033
--- a/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2034
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds3.c
2036
+/* { dg-do run } */
2037
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2039
+extern void abort (void);
2040
+typedef long long s64;
2043
+adds_ext (s64 a, int b, int c)
2054
+adds_shift_ext (s64 a, int b, int c)
2056
+ s64 d = (a + ((s64)b << 3));
2069
+ x = adds_ext (0x13000002ll, 41, 15);
2070
+ if (x != 318767203)
2073
+ x = adds_ext (0x50505050ll, 29, 4);
2074
+ if (x != 1347440782)
2077
+ x = adds_ext (0x12121212121ll, 2, 14);
2078
+ if (x != 555819315)
2081
+ x = adds_shift_ext (0x123456789ll, 4, 12);
2082
+ if (x != 591751097)
2085
+ x = adds_shift_ext (0x02020202ll, 9, 8);
2086
+ if (x != 33686107)
2089
+ x = adds_shift_ext (0x987987987987ll, 23, 41);
2090
+ if (x != -2020050305)
2096
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
2097
--- a/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2098
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs2.c
2100
+/* { dg-do run } */
2101
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2103
+extern void abort (void);
2106
+subs_si_test1 (int a, int b, int c)
2110
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2111
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
2119
+subs_si_test2 (int a, int b, int c)
2121
+ int d = a - 0xfff;
2123
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2124
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, #4095" } } */
2132
+subs_si_test3 (int a, int b, int c)
2134
+ int d = a - (b << 3);
2136
+ /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2137
+ /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
2144
+typedef long long s64;
2147
+subs_di_test1 (s64 a, s64 b, s64 c)
2151
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2152
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
2160
+subs_di_test2 (s64 a, s64 b, s64 c)
2162
+ s64 d = a - 0x1000ll;
2164
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2165
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, #4096" } } */
2173
+subs_di_test3 (s64 a, s64 b, s64 c)
2175
+ s64 d = a - (b << 3);
2177
+ /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2178
+ /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
2190
+ x = subs_si_test1 (29, 4, 5);
2194
+ x = subs_si_test1 (5, 2, 20);
2198
+ x = subs_si_test2 (29, 4, 5);
2202
+ x = subs_si_test2 (1024, 2, 20);
2206
+ x = subs_si_test3 (35, 4, 5);
2210
+ x = subs_si_test3 (5, 2, 20);
2214
+ y = subs_di_test1 (0x130000029ll,
2218
+ if (y != 0x63505052e)
2221
+ y = subs_di_test1 (0x5000500050005ll,
2222
+ 0x2111211121112ll,
2223
+ 0x0000000002020ll);
2224
+ if (y != 0x5000500052025)
2227
+ y = subs_di_test2 (0x130000029ll,
2230
+ if (y != 0x95504f532)
2233
+ y = subs_di_test2 (0x540004100ll,
2236
+ if (y != 0x1065053309)
2239
+ y = subs_di_test3 (0x130000029ll,
2242
+ if (y != 0x63505052e)
2245
+ y = subs_di_test3 (0x130002900ll,
2248
+ if (y != 0x635052e05)
2254
+/* { dg-final { cleanup-saved-temps } } */
2255
--- a/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
2256
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c
2258
+/* { dg-do run } */
2259
+/* { dg-options "-O3 --save-temps" } */
2261
+#include <arm_neon.h>
2263
+#include <stdlib.h>
2266
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
2267
+float32_t rec_f[] =
2268
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
2270
+{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
2271
+float32_t rec_d[] =
2272
+{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
2275
+test_frecpx_float32_t (void)
2279
+ for (i = 0; i < 8; i++)
2280
+ ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001;
2285
+/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */
2288
+test_frecpx_float64_t (void)
2292
+ for (i = 0; i < 8; i++)
2293
+ ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001;
2298
+/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */
2301
+main (int argc, char **argv)
2303
+ if (!test_frecpx_float32_t ())
2305
+ if (!test_frecpx_float64_t ())
2311
+/* { dg-final { cleanup-saved-temps } } */
2312
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
2313
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c
2315
+/* { dg-do run } */
2316
+/* { dg-options "-O3 --save-temps" } */
2318
+#include <arm_neon.h>
2320
+extern void abort (void);
2321
+extern float fabsf (float);
2322
+extern double fabs (double);
2324
+extern double trunc (double);
2325
+extern double round (double);
2326
+extern double nearbyint (double);
2327
+extern double floor (double);
2328
+extern double ceil (double);
2329
+extern double rint (double);
2331
+extern float truncf (float);
2332
+extern float roundf (float);
2333
+extern float nearbyintf (float);
2334
+extern float floorf (float);
2335
+extern float ceilf (float);
2336
+extern float rintf (float);
2338
+#define NUM_TESTS 8
2339
+#define DELTA 0.000001
2341
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
2342
+ 200.0f, -800.0f, -13.0f, -0.5f};
2343
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
2344
+ 200.0, -800.0, -13.0, -0.5};
2346
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
2348
+test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \
2352
+ int nlanes = LANES; \
2353
+ float##WIDTH##_t expected_out[NUM_TESTS]; \
2354
+ float##WIDTH##_t actual_out[NUM_TESTS]; \
2356
+ for (i = 0; i < NUM_TESTS; i++) \
2358
+ expected_out[i] = C_FN##F (input_f##WIDTH[i]); \
2359
+ /* Don't vectorize this. */ \
2360
+ asm volatile ("" : : : "memory"); \
2363
+ /* Prevent the compiler from noticing these two loops do the same \
2364
+ thing and optimizing away the comparison. */ \
2365
+ asm volatile ("" : : : "memory"); \
2367
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
2369
+ float##WIDTH##x##LANES##_t out = \
2370
+ vrnd##SUFFIX##Q##_f##WIDTH \
2371
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
2372
+ vst1##Q##_f##WIDTH (actual_out + i, out); \
2375
+ for (i = 0; i < NUM_TESTS; i++) \
2376
+ ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \
2382
+#define BUILD_VARIANTS(SUFFIX, C_FN) \
2383
+TEST (SUFFIX, , 32, 2, C_FN, f) \
2384
+TEST (SUFFIX, q, 32, 4, C_FN, f) \
2385
+TEST (SUFFIX, q, 64, 2, C_FN, ) \
2387
+BUILD_VARIANTS ( , trunc)
2388
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2389
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2390
+/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2391
+BUILD_VARIANTS (a, round)
2392
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2393
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2394
+/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2395
+BUILD_VARIANTS (i, nearbyint)
2396
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2397
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2398
+/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2399
+BUILD_VARIANTS (m, floor)
2400
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2401
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2402
+/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2403
+BUILD_VARIANTS (p, ceil)
2404
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2405
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2406
+/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2407
+BUILD_VARIANTS (x, rint)
2408
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
2409
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
2410
+/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
2413
+#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \
2415
+ if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \
2420
+main (int argc, char **argv)
2422
+ BUILD_VARIANTS ( , trunc)
2423
+ BUILD_VARIANTS (a, round)
2424
+ BUILD_VARIANTS (i, nearbyint)
2425
+ BUILD_VARIANTS (m, floor)
2426
+ BUILD_VARIANTS (p, ceil)
2427
+ BUILD_VARIANTS (x, rint)
2431
+/* { dg-final { cleanup-saved-temps } } */
2432
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
2433
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
2435
/* { dg-do compile } */
2436
/* { dg-options "-O2" } */
2439
+#include "atomic-op-relaxed.x"
2442
-atomic_fetch_add_RELAXED (int a)
2444
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
2448
-atomic_fetch_sub_RELAXED (int a)
2450
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
2454
-atomic_fetch_and_RELAXED (int a)
2456
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
2460
-atomic_fetch_nand_RELAXED (int a)
2462
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
2466
-atomic_fetch_xor_RELAXED (int a)
2468
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
2472
-atomic_fetch_or_RELAXED (int a)
2474
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
2477
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2478
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2479
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
2480
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
2483
+/* { dg-do compile } */
2484
+/* { dg-options "-O3" } */
2486
+#include "arm_neon.h"
2488
+#include "vaddv-intrinsic.x"
2490
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
2491
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
2492
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
2493
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
2494
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x
2499
+atomic_fetch_add_RELAXED (int a)
2501
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
2505
+atomic_fetch_sub_RELAXED (int a)
2507
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
2511
+atomic_fetch_and_RELAXED (int a)
2513
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
2517
+atomic_fetch_nand_RELAXED (int a)
2519
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
2523
+atomic_fetch_xor_RELAXED (int a)
2525
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
2529
+atomic_fetch_or_RELAXED (int a)
2531
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
2533
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.c
2534
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.c
2536
int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
2537
unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
2538
unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
2539
+ int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2540
+ int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2541
int reduce_smax_value = 0;
2542
int reduce_smin_value = -15;
2543
unsigned int reduce_umax_value = 15;
2550
TESTV (reduce_smax, s);
2551
TESTV (reduce_smin, s);
2552
TESTV (reduce_umax, u);
2553
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
2554
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
2556
/* { dg-do compile } */
2557
/* { dg-options "-O2" } */
2560
+#include "atomic-op-acquire.x"
2563
-atomic_fetch_add_ACQUIRE (int a)
2565
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
2569
-atomic_fetch_sub_ACQUIRE (int a)
2571
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
2575
-atomic_fetch_and_ACQUIRE (int a)
2577
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
2581
-atomic_fetch_nand_ACQUIRE (int a)
2583
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
2587
-atomic_fetch_xor_ACQUIRE (int a)
2589
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
2593
-atomic_fetch_or_ACQUIRE (int a)
2595
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
2598
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2599
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2600
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
2601
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
2603
/* { dg-do compile } */
2604
/* { dg-options "-O2" } */
2609
+#include "atomic-comp-swap-release-acquire.x"
2612
-atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
2614
- return __atomic_compare_exchange (&v, &a, &b,
2615
- STRONG, __ATOMIC_RELEASE,
2616
- __ATOMIC_ACQUIRE);
2620
-atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
2622
- return __atomic_compare_exchange (&v, &a, &b,
2623
- WEAK, __ATOMIC_RELEASE,
2624
- __ATOMIC_ACQUIRE);
2628
-atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
2630
- return __atomic_compare_exchange_n (&v, &a, b,
2631
- STRONG, __ATOMIC_RELEASE,
2632
- __ATOMIC_ACQUIRE);
2636
-atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
2638
- return __atomic_compare_exchange_n (&v, &a, b,
2639
- WEAK, __ATOMIC_RELEASE,
2640
- __ATOMIC_ACQUIRE);
2643
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
2644
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
2645
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
2646
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
2647
@@ -138,3 +138,17 @@
2652
+void sabd (pRINT a, pRINT b, pRINT c)
2655
+ for (i = 0; i < 16; i++)
2656
+ c[i] = abs (a[i] - b[i]);
2659
+void saba (pRINT a, pRINT b, pRINT c)
2662
+ for (i = 0; i < 16; i++)
2663
+ c[i] += abs (a[i] - b[i]);
2665
--- a/src/gcc/testsuite/gcc.target/aarch64/subs3.c
2666
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs3.c
2668
+/* { dg-do run } */
2669
+/* { dg-options "-O2 --save-temps -fno-inline" } */
2671
+extern void abort (void);
2672
+typedef long long s64;
2675
+subs_ext (s64 a, int b, int c)
2686
+subs_shift_ext (s64 a, int b, int c)
2688
+ s64 d = (a - ((s64)b << 3));
2701
+ x = subs_ext (0x13000002ll, 41, 15);
2702
+ if (x != 318767121)
2705
+ x = subs_ext (0x50505050ll, 29, 4);
2706
+ if (x != 1347440724)
2709
+ x = subs_ext (0x12121212121ll, 2, 14);
2710
+ if (x != 555819311)
2713
+ x = subs_shift_ext (0x123456789ll, 4, 12);
2714
+ if (x != 591751033)
2717
+ x = subs_shift_ext (0x02020202ll, 9, 8);
2718
+ if (x != 33685963)
2721
+ x = subs_shift_ext (0x987987987987ll, 23, 41);
2722
+ if (x != -2020050673)
2728
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */
2729
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
2730
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x
2735
+atomic_fetch_add_ACQUIRE (int a)
2737
+ return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
2741
+atomic_fetch_sub_ACQUIRE (int a)
2743
+ return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
2747
+atomic_fetch_and_ACQUIRE (int a)
2749
+ return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
2753
+atomic_fetch_nand_ACQUIRE (int a)
2755
+ return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
2759
+atomic_fetch_xor_ACQUIRE (int a)
2761
+ return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
2765
+atomic_fetch_or_ACQUIRE (int a)
2767
+ return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
2769
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
2770
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
2773
+/* { dg-do run } */
2774
+/* { dg-options "-O3" } */
2776
+#include "arm_neon.h"
2778
+extern void abort (void);
2780
+#include "vaddv-intrinsic.x"
2785
+ const float32_t pool_v2sf[] = {4.0f, 9.0f};
2786
+ const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
2787
+ const float64_t pool_v2df[] = {4.0, 9.0};
2789
+ if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
2792
+ if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
2795
+ if (test_vaddv_v2df (pool_v2df) != 13.0)
2800
--- a/src/gcc/testsuite/gcc.target/aarch64/sbc.c
2801
+++ b/src/gcc/testsuite/gcc.target/aarch64/sbc.c
2803
+/* { dg-do run } */
2804
+/* { dg-options "-O2 --save-temps" } */
2806
+extern void abort (void);
2808
+typedef unsigned int u32int;
2809
+typedef unsigned long long u64int;
2812
+test_si (u32int w1, u32int w2, u32int w3, u32int w4)
2815
+ /* { dg-final { scan-assembler "sbc\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+\n" } } */
2816
+ w0 = w1 - w2 - (w3 < w4);
2821
+test_di (u64int x1, u64int x2, u64int x3, u64int x4)
2824
+ /* { dg-final { scan-assembler "sbc\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+\n" } } */
2825
+ x0 = x1 - x2 - (x3 < x4);
2834
+ x = test_si (7, 8, 12, 15);
2837
+ y = test_di (0x987654321ll, 0x123456789ll, 0x345345345ll, 0x123123123ll);
2838
+ if (y != 0x8641fdb98ll)
2843
+/* { dg-final { cleanup-saved-temps } } */
2844
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
2845
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x
2853
+atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
2855
+ return __atomic_compare_exchange (&v, &a, &b,
2856
+ STRONG, __ATOMIC_RELEASE,
2857
+ __ATOMIC_ACQUIRE);
2861
+atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
2863
+ return __atomic_compare_exchange (&v, &a, &b,
2864
+ WEAK, __ATOMIC_RELEASE,
2865
+ __ATOMIC_ACQUIRE);
2869
+atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
2871
+ return __atomic_compare_exchange_n (&v, &a, b,
2872
+ STRONG, __ATOMIC_RELEASE,
2873
+ __ATOMIC_ACQUIRE);
2877
+atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
2879
+ return __atomic_compare_exchange_n (&v, &a, b,
2880
+ WEAK, __ATOMIC_RELEASE,
2881
+ __ATOMIC_ACQUIRE);
2883
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
2884
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
2886
/* { dg-do compile } */
2887
/* { dg-options "-O2" } */
2890
+#include "atomic-op-int.x"
2893
-atomic_fetch_add_RELAXED (int a)
2895
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
2899
-atomic_fetch_sub_RELAXED (int a)
2901
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
2905
-atomic_fetch_and_RELAXED (int a)
2907
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
2911
-atomic_fetch_nand_RELAXED (int a)
2913
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
2917
-atomic_fetch_xor_RELAXED (int a)
2919
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
2923
-atomic_fetch_or_RELAXED (int a)
2925
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
2928
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2929
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2930
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
2931
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
2933
/* { dg-do compile } */
2934
/* { dg-options "-O2" } */
2937
+#include "atomic-op-seq_cst.x"
2940
-atomic_fetch_add_SEQ_CST (int a)
2942
- return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
2946
-atomic_fetch_sub_SEQ_CST (int a)
2948
- return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
2952
-atomic_fetch_and_SEQ_CST (int a)
2954
- return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
2958
-atomic_fetch_nand_SEQ_CST (int a)
2960
- return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
2964
-atomic_fetch_xor_SEQ_CST (int a)
2966
- return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
2970
-atomic_fetch_or_SEQ_CST (int a)
2972
- return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
2975
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2976
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
2977
--- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
2978
+++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
2982
+test_vaddv_v2sf (const float32_t *pool)
2986
+ val = vld1_f32 (pool);
2987
+ return vaddv_f32 (val);
2991
+test_vaddv_v4sf (const float32_t *pool)
2995
+ val = vld1q_f32 (pool);
2996
+ return vaddvq_f32 (val);
3000
+test_vaddv_v2df (const float64_t *pool)
3004
+ val = vld1q_f64 (pool);
3005
+ return vaddvq_f64 (val);
3007
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
3008
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
3010
/* { dg-do compile } */
3011
/* { dg-options "-O2" } */
3014
+#include "atomic-op-consume.x"
3017
-atomic_fetch_add_CONSUME (int a)
3019
- return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
3023
-atomic_fetch_sub_CONSUME (int a)
3025
- return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
3029
-atomic_fetch_and_CONSUME (int a)
3031
- return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
3035
-atomic_fetch_nand_CONSUME (int a)
3037
- return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
3041
-atomic_fetch_xor_CONSUME (int a)
3043
- return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
3047
-atomic_fetch_or_CONSUME (int a)
3049
- return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
3052
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3053
/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3054
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
3055
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
3057
/* { dg-do compile } */
3058
/* { dg-options "-O2" } */
3061
+#include "atomic-op-char.x"
3064
-atomic_fetch_add_RELAXED (char a)
3066
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3070
-atomic_fetch_sub_RELAXED (char a)
3072
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3076
-atomic_fetch_and_RELAXED (char a)
3078
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3082
-atomic_fetch_nand_RELAXED (char a)
3084
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3088
-atomic_fetch_xor_RELAXED (char a)
3090
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3094
-atomic_fetch_or_RELAXED (char a)
3096
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3099
/* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3100
/* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3101
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
3102
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x
3107
+atomic_fetch_add_RELAXED (int a)
3109
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3113
+atomic_fetch_sub_RELAXED (int a)
3115
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3119
+atomic_fetch_and_RELAXED (int a)
3121
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3125
+atomic_fetch_nand_RELAXED (int a)
3127
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3131
+atomic_fetch_xor_RELAXED (int a)
3133
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3137
+atomic_fetch_or_RELAXED (int a)
3139
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3141
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
3142
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x
3147
+atomic_fetch_add_SEQ_CST (int a)
3149
+ return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
3153
+atomic_fetch_sub_SEQ_CST (int a)
3155
+ return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
3159
+atomic_fetch_and_SEQ_CST (int a)
3161
+ return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
3165
+atomic_fetch_nand_SEQ_CST (int a)
3167
+ return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
3171
+atomic_fetch_xor_SEQ_CST (int a)
3173
+ return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
3177
+atomic_fetch_or_SEQ_CST (int a)
3179
+ return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
3181
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
3182
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x
3187
+atomic_fetch_add_CONSUME (int a)
3189
+ return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
3193
+atomic_fetch_sub_CONSUME (int a)
3195
+ return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
3199
+atomic_fetch_and_CONSUME (int a)
3201
+ return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
3205
+atomic_fetch_nand_CONSUME (int a)
3207
+ return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
3211
+atomic_fetch_xor_CONSUME (int a)
3213
+ return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
3217
+atomic_fetch_or_CONSUME (int a)
3219
+ return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
3221
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
3222
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
3224
/* { dg-do compile } */
3225
/* { dg-options "-O2" } */
3228
+#include "atomic-op-short.x"
3231
-atomic_fetch_add_RELAXED (short a)
3233
- return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3237
-atomic_fetch_sub_RELAXED (short a)
3239
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3243
-atomic_fetch_and_RELAXED (short a)
3245
- return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3249
-atomic_fetch_nand_RELAXED (short a)
3251
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3255
-atomic_fetch_xor_RELAXED (short a)
3257
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3261
-atomic_fetch_or_RELAXED (short a)
3263
- return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3266
/* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3267
/* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3268
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
3269
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x
3274
+atomic_fetch_add_RELAXED (char a)
3276
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3280
+atomic_fetch_sub_RELAXED (char a)
3282
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3286
+atomic_fetch_and_RELAXED (char a)
3288
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3292
+atomic_fetch_nand_RELAXED (char a)
3294
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3298
+atomic_fetch_xor_RELAXED (char a)
3300
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3304
+atomic_fetch_or_RELAXED (char a)
3306
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3308
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
3309
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
3311
/* { dg-final { scan-assembler "fdiv\\tv" } } */
3312
/* { dg-final { scan-assembler "fneg\\tv" } } */
3313
/* { dg-final { scan-assembler "fabs\\tv" } } */
3314
+/* { dg-final { scan-assembler "fabd\\tv" } } */
3315
--- a/src/gcc/testsuite/gcc.target/aarch64/adds1.c
3316
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds1.c
3318
+/* { dg-do run } */
3319
+/* { dg-options "-O2 --save-temps -fno-inline" } */
3321
+extern void abort (void);
3324
+adds_si_test1 (int a, int b, int c)
3328
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
3336
+adds_si_test2 (int a, int b, int c)
3340
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, 255" } } */
3348
+adds_si_test3 (int a, int b, int c)
3350
+ int d = a + (b << 3);
3352
+ /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
3359
+typedef long long s64;
3362
+adds_di_test1 (s64 a, s64 b, s64 c)
3366
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
3374
+adds_di_test2 (s64 a, s64 b, s64 c)
3378
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, 255" } } */
3386
+adds_di_test3 (s64 a, s64 b, s64 c)
3388
+ s64 d = a + (b << 3);
3390
+ /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
3402
+ x = adds_si_test1 (29, 4, 5);
3406
+ x = adds_si_test1 (5, 2, 20);
3410
+ x = adds_si_test2 (29, 4, 5);
3414
+ x = adds_si_test2 (1024, 2, 20);
3418
+ x = adds_si_test3 (35, 4, 5);
3422
+ x = adds_si_test3 (5, 2, 20);
3426
+ y = adds_di_test1 (0x130000029ll,
3430
+ if (y != 0xc75050536)
3433
+ y = adds_di_test1 (0x5000500050005ll,
3434
+ 0x2111211121112ll,
3435
+ 0x0000000002020ll);
3436
+ if (y != 0x9222922294249)
3439
+ y = adds_di_test2 (0x130000029ll,
3442
+ if (y != 0x955050631)
3445
+ y = adds_di_test2 (0x130002900ll,
3448
+ if (y != 0x955052f08)
3451
+ y = adds_di_test3 (0x130000029ll,
3454
+ if (y != 0x9b9050576)
3457
+ y = adds_di_test3 (0x130002900ll,
3460
+ if (y != 0xafd052e4d)
3466
+/* { dg-final { cleanup-saved-temps } } */
3467
--- a/src/gcc/testsuite/gcc.target/aarch64/ror.c
3468
+++ b/src/gcc/testsuite/gcc.target/aarch64/ror.c
3470
+/* { dg-options "-O2 --save-temps" } */
3471
+/* { dg-do run } */
3473
+extern void abort (void);
3478
+ /* { dg-final { scan-assembler "ror\tw\[0-9\]+, w\[0-9\]+, 27\n" } } */
3479
+ return (a << 5) | ((unsigned int) a >> 27);
3483
+test_di (long long a)
3485
+ /* { dg-final { scan-assembler "ror\tx\[0-9\]+, x\[0-9\]+, 45\n" } } */
3486
+ return (a << 19) | ((unsigned long long) a >> 45);
3494
+ v = test_si (0x0203050);
3495
+ if (v != 0x4060a00)
3497
+ w = test_di (0x0000020506010304ll);
3498
+ if (w != 0x1028300818200000ll)
3503
+/* { dg-final { cleanup-saved-temps } } */
3504
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
3505
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
3507
/* { dg-do compile } */
3508
/* { dg-options "-O2" } */
3511
+#include "atomic-op-release.x"
3514
-atomic_fetch_add_RELEASE (int a)
3516
- return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
3520
-atomic_fetch_sub_RELEASE (int a)
3522
- return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
3526
-atomic_fetch_and_RELEASE (int a)
3528
- return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
3532
-atomic_fetch_nand_RELEASE (int a)
3534
- return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
3538
-atomic_fetch_xor_RELEASE (int a)
3540
- return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
3544
-atomic_fetch_or_RELEASE (int a)
3546
- return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
3549
/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3550
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
3551
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
3552
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x
3557
+atomic_fetch_add_RELAXED (short a)
3559
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
3563
+atomic_fetch_sub_RELAXED (short a)
3565
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
3569
+atomic_fetch_and_RELAXED (short a)
3571
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
3575
+atomic_fetch_nand_RELAXED (short a)
3577
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
3581
+atomic_fetch_xor_RELAXED (short a)
3583
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
3587
+atomic_fetch_or_RELAXED (short a)
3589
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
3591
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
3592
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c
3594
+/* { dg-do run } */
3595
+/* { dg-options "-O3 --save-temps -ffast-math" } */
3597
+#include <arm_neon.h>
3599
+extern void abort (void);
3600
+extern double fabs (double);
3602
+#define NUM_TESTS 8
3603
+#define DELTA 0.000001
3605
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f,
3606
+ 200.0f, -800.0f, -13.0f, -0.5f};
3607
+double input_f64[] = {0.1, -0.1, 0.4, 10.3,
3608
+ 200.0, -800.0, -13.0, -0.5};
3610
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
3612
+test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t (void) \
3616
+ int nlanes = LANES; \
3617
+ U##int##WIDTH##_t expected_out[NUM_TESTS]; \
3618
+ U##int##WIDTH##_t actual_out[NUM_TESTS]; \
3620
+ for (i = 0; i < NUM_TESTS; i++) \
3623
+ = vcvt##SUFFIX##D##_##S##WIDTH##_f##WIDTH (input_f##WIDTH[i]); \
3624
+ /* Don't vectorize this. */ \
3625
+ asm volatile ("" : : : "memory"); \
3628
+ for (i = 0; i < NUM_TESTS; i+=nlanes) \
3630
+ U##int##WIDTH##x##LANES##_t out = \
3631
+ vcvt##SUFFIX##Q##_##S##WIDTH##_f##WIDTH \
3632
+ (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \
3633
+ vst1##Q##_##S##WIDTH (actual_out + i, out); \
3636
+ for (i = 0; i < NUM_TESTS; i++) \
3637
+ ret &= fabs (expected_out[i] - actual_out[i]) < DELTA; \
3643
+#define BUILD_VARIANTS(SUFFIX) \
3644
+TEST (SUFFIX, , 32, 2, s, ,s) \
3645
+TEST (SUFFIX, q, 32, 4, s, ,s) \
3646
+TEST (SUFFIX, q, 64, 2, s, ,d) \
3647
+TEST (SUFFIX, , 32, 2, u,u,s) \
3648
+TEST (SUFFIX, q, 32, 4, u,u,s) \
3649
+TEST (SUFFIX, q, 64, 2, u,u,d) \
3652
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
3653
+/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */
3654
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3655
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3656
+/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3657
+/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */
3658
+/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */
3659
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3660
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3661
+/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3663
+/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */
3664
+/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */
3665
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3666
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3667
+/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3668
+/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */
3669
+/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */
3670
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3671
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3672
+/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3674
+/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */
3675
+/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */
3676
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3677
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3678
+/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3679
+/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */
3680
+/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */
3681
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3682
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3683
+/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3685
+/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */
3686
+/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */
3687
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3688
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3689
+/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3690
+/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */
3691
+/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */
3692
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3693
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3694
+/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3696
+/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */
3697
+/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */
3698
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3699
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3700
+/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3701
+/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */
3702
+/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */
3703
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
3704
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
3705
+/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
3708
+#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \
3710
+ if (!test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t ()) \
3715
+main (int argc, char **argv)
3717
+ BUILD_VARIANTS ( )
3718
+ BUILD_VARIANTS (a)
3719
+ BUILD_VARIANTS (m)
3720
+ BUILD_VARIANTS (n)
3721
+ BUILD_VARIANTS (p)
3725
+/* { dg-final { cleanup-saved-temps } } */
3726
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
3727
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x
3732
+atomic_fetch_add_RELEASE (int a)
3734
+ return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
3738
+atomic_fetch_sub_RELEASE (int a)
3740
+ return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
3744
+atomic_fetch_and_RELEASE (int a)
3746
+ return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
3750
+atomic_fetch_nand_RELEASE (int a)
3752
+ return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
3756
+atomic_fetch_xor_RELEASE (int a)
3758
+ return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
3762
+atomic_fetch_or_RELEASE (int a)
3764
+ return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
3766
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
3767
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
3768
@@ -117,6 +117,16 @@
3769
9.0, 10.0, 11.0, 12.0,
3770
13.0, 14.0, 15.0, 16.0 };
3772
+ F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
3773
+ 1.0f, 1.0f, 1.0f, 1.0f,
3774
+ 1.0f, 1.0f, 1.0f, 1.0f,
3775
+ 1.0f, 1.0f, 1.0f, 1.0f };
3777
+ F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
3778
+ 1.0, 1.0, 1.0, 1.0,
3779
+ 1.0, 1.0, 1.0, 1.0,
3780
+ 1.0, 1.0, 1.0, 1.0 };
3782
/* Setup input vectors. */
3783
for (i=1; i<=16; i++)
3793
--- a/src/gcc/testsuite/gcc.target/aarch64/ngc.c
3794
+++ b/src/gcc/testsuite/gcc.target/aarch64/ngc.c
3796
+/* { dg-do run } */
3797
+/* { dg-options "-O2 --save-temps -fno-inline" } */
3799
+extern void abort (void);
3800
+typedef unsigned int u32;
3803
+ngc_si (u32 a, u32 b, u32 c, u32 d)
3809
+typedef unsigned long long u64;
3812
+ngc_si_tst (u64 a, u32 b, u32 c, u32 d)
3819
+ngc_di (u64 a, u64 b, u64 c, u64 d)
3831
+ x = ngc_si (29, 4, 5, 4);
3835
+ x = ngc_si (1024, 2, 20, 13);
3839
+ y = ngc_si_tst (0x130000029ll, 32, 50, 12);
3840
+ if (y != 0xffffffe0)
3843
+ y = ngc_si_tst (0x5000500050005ll, 21, 2, 14);
3844
+ if (y != 0xffffffea)
3847
+ y = ngc_di (0x130000029ll, 0x320000004ll, 0x505050505ll, 0x123123123ll);
3848
+ if (y != 0xfffffffcdffffffc)
3851
+ y = ngc_di (0x5000500050005ll,
3852
+ 0x2111211121112ll, 0x0000000002020ll, 0x1414575046477ll);
3853
+ if (y != 0xfffdeeedeeedeeed)
3859
+/* { dg-final { scan-assembler-times "ngc\tw\[0-9\]+, w\[0-9\]+" 2 } } */
3860
+/* { dg-final { scan-assembler-times "ngc\tx\[0-9\]+, x\[0-9\]+" 1 } } */
3861
+/* { dg-final { cleanup-saved-temps } } */
3862
--- a/src/gcc/testsuite/gcc.target/aarch64/cmp.c
3863
+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp.c
3865
+/* { dg-do compile } */
3866
+/* { dg-options "-O2" } */
3869
+cmp_si_test1 (int a, int b, int c)
3878
+cmp_si_test2 (int a, int b, int c)
3886
+typedef long long s64;
3889
+cmp_di_test1 (s64 a, s64 b, s64 c)
3898
+cmp_di_test2 (s64 a, s64 b, s64 c)
3907
+cmp_di_test3 (int a, s64 b, s64 c)
3916
+cmp_di_test4 (int a, s64 b, s64 c)
3918
+ if (((s64)a << 3) > b)
3924
+/* { dg-final { scan-assembler-times "cmp\tw\[0-9\]+, w\[0-9\]+" 2 } } */
3925
+/* { dg-final { scan-assembler-times "cmp\tx\[0-9\]+, x\[0-9\]+" 4 } } */
3926
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
3927
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x
3929
extern float fabsf (float);
3930
extern double fabs (double);
3932
+#define DEF3a(fname, type, op) \
3933
+ void fname##_##type (pR##type a, \
3938
+ for (i = 0; i < 16; i++) \
3939
+ a[i] = op (b[i] - c[i]); \
3942
#define DEF3(fname, type, op) \
3943
void fname##_##type (pR##type a, \
3948
- for (i=0; i<16; i++) \
3949
+ for (i = 0; i < 16; i++) \
3950
a[i] = b[i] op c[i]; \
3957
- for (i=0; i<16; i++) \
3958
+ for (i = 0; i < 16; i++) \
3963
+#define DEFN3a(fname, op) \
3964
+ DEF3a (fname, F32, op) \
3965
+ DEF3a (fname, F64, op)
3967
#define DEFN3(fname, op) \
3968
DEF3 (fname, F32, op) \
3969
DEF3 (fname, F64, op)
3972
DEF2 (abs, F32, fabsf)
3973
DEF2 (abs, F64, fabs)
3974
+DEF3a (fabd, F32, fabsf)
3975
+DEF3a (fabd, F64, fabs)
3976
--- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
3977
+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
3979
/* { dg-do compile } */
3980
/* { dg-options "-O2" } */
3983
+#include "atomic-op-acq_rel.x"
3986
-atomic_fetch_add_ACQ_REL (int a)
3988
- return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
3992
-atomic_fetch_sub_ACQ_REL (int a)
3994
- return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
3998
-atomic_fetch_and_ACQ_REL (int a)
4000
- return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
4004
-atomic_fetch_nand_ACQ_REL (int a)
4006
- return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
4010
-atomic_fetch_xor_ACQ_REL (int a)
4012
- return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
4016
-atomic_fetch_or_ACQ_REL (int a)
4018
- return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
4021
/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4022
/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
4023
--- a/src/gcc/testsuite/gcc.target/aarch64/subs1.c
4024
+++ b/src/gcc/testsuite/gcc.target/aarch64/subs1.c
4026
+/* { dg-do run } */
4027
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4029
+extern void abort (void);
4032
+subs_si_test1 (int a, int b, int c)
4036
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4044
+subs_si_test2 (int a, int b, int c)
4048
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, #255" } } */
4056
+subs_si_test3 (int a, int b, int c)
4058
+ int d = a - (b << 3);
4060
+ /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4067
+typedef long long s64;
4070
+subs_di_test1 (s64 a, s64 b, s64 c)
4074
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4082
+subs_di_test2 (s64 a, s64 b, s64 c)
4086
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, #255" } } */
4094
+subs_di_test3 (s64 a, s64 b, s64 c)
4096
+ s64 d = a - (b << 3);
4098
+ /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4110
+ x = subs_si_test1 (29, 4, 5);
4114
+ x = subs_si_test1 (5, 2, 20);
4118
+ x = subs_si_test2 (29, 4, 5);
4122
+ x = subs_si_test2 (1024, 2, 20);
4126
+ x = subs_si_test3 (35, 4, 5);
4130
+ x = subs_si_test3 (5, 2, 20);
4134
+ y = subs_di_test1 (0x130000029ll,
4138
+ if (y != 0x45000002d)
4141
+ y = subs_di_test1 (0x5000500050005ll,
4142
+ 0x2111211121112ll,
4143
+ 0x0000000002020ll);
4144
+ if (y != 0x7111711171117)
4147
+ y = subs_di_test2 (0x130000029ll,
4150
+ if (y != 0x955050433)
4153
+ y = subs_di_test2 (0x130002900ll,
4156
+ if (y != 0x955052d0a)
4159
+ y = subs_di_test3 (0x130000029ll,
4162
+ if (y != 0x3790504f6)
4165
+ y = subs_di_test3 (0x130002900ll,
4168
+ if (y != 0x27d052dcd)
4174
+/* { dg-final { cleanup-saved-temps } } */
4175
--- a/src/gcc/testsuite/gcc.target/aarch64/adds2.c
4176
+++ b/src/gcc/testsuite/gcc.target/aarch64/adds2.c
4178
+/* { dg-do run } */
4179
+/* { dg-options "-O2 --save-temps -fno-inline" } */
4181
+extern void abort (void);
4184
+adds_si_test1 (int a, int b, int c)
4188
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4189
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
4197
+adds_si_test2 (int a, int b, int c)
4199
+ int d = a + 0xfff;
4201
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
4202
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, 4095" } } */
4210
+adds_si_test3 (int a, int b, int c)
4212
+ int d = a + (b << 3);
4214
+ /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4215
+ /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */
4222
+typedef long long s64;
4225
+adds_di_test1 (s64 a, s64 b, s64 c)
4229
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4230
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
4238
+adds_di_test2 (s64 a, s64 b, s64 c)
4240
+ s64 d = a + 0x1000ll;
4242
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
4243
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 4096" } } */
4251
+adds_di_test3 (s64 a, s64 b, s64 c)
4253
+ s64 d = a + (b << 3);
4255
+ /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4256
+ /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */
4268
+ x = adds_si_test1 (29, 4, 5);
4272
+ x = adds_si_test1 (5, 2, 20);
4276
+ x = adds_si_test2 (29, 4, 5);
4280
+ x = adds_si_test2 (1024, 2, 20);
4284
+ x = adds_si_test3 (35, 4, 5);
4288
+ x = adds_si_test3 (5, 2, 20);
4292
+ y = adds_di_test1 (0x130000029ll,
4296
+ if (y != 0xc75050536)
4299
+ y = adds_di_test1 (0x5000500050005ll,
4300
+ 0x2111211121112ll,
4301
+ 0x0000000002020ll);
4302
+ if (y != 0x9222922294249)
4305
+ y = adds_di_test2 (0x130000029ll,
4308
+ if (y != 0x955051532)
4311
+ y = adds_di_test2 (0x540004100ll,
4314
+ if (y != 0x1065055309)
4317
+ y = adds_di_test3 (0x130000029ll,
4320
+ if (y != 0x9b9050576)
4323
+ y = adds_di_test3 (0x130002900ll,
4326
+ if (y != 0xafd052e4d)
4332
+/* { dg-final { cleanup-saved-temps } } */
4333
--- a/src/gcc/testsuite/lib/target-supports.exp
4334
+++ b/src/gcc/testsuite/lib/target-supports.exp
4335
@@ -2012,6 +2012,7 @@
4336
|| ([istarget powerpc*-*-*]
4337
&& ![istarget powerpc-*-linux*paired*])
4338
|| [istarget x86_64-*-*]
4339
+ || [istarget aarch64*-*-*]
4340
|| ([istarget arm*-*-*]
4341
&& [check_effective_target_arm_neon_ok])} {
4342
set et_vect_uintfloat_cvt_saved 1
4343
@@ -2147,22 +2148,6 @@
4347
-# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
4348
-# -mfloat-abi=softfp
4349
-proc check_effective_target_arm_v8_neon_ok {} {
4350
- if { [check_effective_target_arm32] } {
4351
- return [check_no_compiler_messages arm_v8_neon_ok object {
4354
- __asm__ volatile ("vrintn.f32 q0, q0");
4357
- } "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"]
4363
# Return 1 if this is an ARM target supporting -mfpu=vfp
4364
# -mfloat-abi=hard. Some multilibs may be incompatible with these
4366
@@ -2226,7 +2211,8 @@
4367
if { ! [check_effective_target_arm_v8_neon_ok] } {
4370
- return "$flags -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=softfp"
4371
+ global et_arm_v8_neon_flags
4372
+ return "$flags $et_arm_v8_neon_flags -march=armv8-a"
4375
# Add the options needed for NEON. We need either -mfloat-abi=softfp
4376
@@ -2270,6 +2256,79 @@
4377
check_effective_target_arm_neon_ok_nocache]
4380
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
4381
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
4382
+# incompatible with these options. Also set et_arm_neon_flags to the
4383
+# best options to add.
4385
+proc check_effective_target_arm_neon_fp16_ok_nocache { } {
4386
+ global et_arm_neon_fp16_flags
4387
+ set et_arm_neon_fp16_flags ""
4388
+ if { [check_effective_target_arm32] } {
4389
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
4390
+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
4391
+ if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
4392
+ #include "arm_neon.h"
4394
+ foo (float32x4_t arg)
4396
+ return vcvt_f16_f32 (arg);
4399
+ set et_arm_neon_fp16_flags $flags
4408
+proc check_effective_target_arm_neon_fp16_ok { } {
4409
+ return [check_cached_effective_target arm_neon_fp16_ok \
4410
+ check_effective_target_arm_neon_fp16_ok_nocache]
4413
+proc add_options_for_arm_neon_fp16 { flags } {
4414
+ if { ! [check_effective_target_arm_neon_fp16_ok] } {
4417
+ global et_arm_neon_fp16_flags
4418
+ return "$flags $et_arm_neon_fp16_flags"
4421
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
4422
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
4423
+# incompatible with these options. Also set et_arm_v8_neon_flags to the
4424
+# best options to add.
4426
+proc check_effective_target_arm_v8_neon_ok_nocache { } {
4427
+ global et_arm_v8_neon_flags
4428
+ set et_arm_v8_neon_flags ""
4429
+ if { [check_effective_target_arm32] } {
4430
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
4431
+ if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
4432
+ #include "arm_neon.h"
4436
+ __asm__ volatile ("vrintn.f32 q0, q0");
4439
+ set et_arm_v8_neon_flags $flags
4448
+proc check_effective_target_arm_v8_neon_ok { } {
4449
+ return [check_cached_effective_target arm_v8_neon_ok \
4450
+ check_effective_target_arm_v8_neon_ok_nocache]
4453
# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
4454
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
4455
# incompatible with these options. Also set et_arm_neonv2_flags to the
4456
@@ -2509,6 +2568,24 @@
4457
} [add_options_for_arm_neonv2 ""]]
4460
+# Return 1 if the target supports executing ARMv8 NEON instructions, 0
4463
+proc check_effective_target_arm_v8_neon_hw { } {
4464
+ return [check_runtime arm_v8_neon_hw_available {
4465
+ #include "arm_neon.h"
4470
+ asm ("vrinta.f32 %P0, %P1"
4475
+ } [add_options_for_arm_v8_neon ""]]
4478
# Return 1 if this is a ARM target with NEON enabled.
4480
proc check_effective_target_arm_neon { } {
4481
--- a/src/gcc/testsuite/ChangeLog.linaro
4482
+++ b/src/gcc/testsuite/ChangeLog.linaro
4484
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4486
+ Backport from trunk r198019.
4487
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
4489
+ * gcc.target/aarch64/adds1.c: New.
4490
+ * gcc.target/aarch64/adds2.c: New.
4491
+ * gcc.target/aarch64/subs1.c: New.
4492
+ * gcc.target/aarch64/subs2.c: New.
4494
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4496
+ Backport from trunk r198394,198396-198400,198402-198404,198406.
4497
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
4499
+ * lib/target-supports.exp (vect_uintfloat_cvt): Enable for AArch64.
4501
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
4503
+ * gcc.target/aarch64/vect-vcvt.c: New.
4505
+ 2013-04-29 James Greenhalgh <james.greenhalgh@arm.com>
4507
+ * gcc.target/aarch64/vect-vrnd.c: New.
4509
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4511
+ Backport from trunk r198302-198306,198316.
4512
+ 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
4513
+ Tejas Belagod <tejas.belagod@arm.com>
4515
+ * gcc.target/aarch64/vaddv-intrinsic.c: New.
4516
+ * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
4517
+ * gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
4519
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
4521
+ * gcc.target/aarch64/cmp.c: New.
4523
+ 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
4525
+ * gcc.target/aarch64/ngc.c: New.
4527
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4529
+ Backport from trunk r198298.
4530
+ 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
4532
+ * lib/target-supports.exp
4533
+ (check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
4534
+ (check_effective_target_arm_neon_fp16_ok): Likewise.
4535
+ (add_options_for_arm_neon_fp16): Likewise.
4536
+ * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
4537
+ * gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
4539
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4541
+ Backport from trunk r198136-198137,198142,198176
4542
+ 2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
4544
+ * gcc.target/aarch64/vrecps.c: New.
4545
+ * gcc.target/aarch64/vrecpx.c: Likewise.
4547
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4549
+ Backport from trunk r198020.
4550
+ 2013-04-16 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
4552
+ * gcc.target/aarch64/adds3.c: New.
4553
+ * gcc.target/aarch64/subs3.c: New.
4555
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4557
+ Backport from trunk r197965.
4558
+ 2013-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
4560
+ * gcc.target/arm/anddi3-opt.c: New test.
4561
+ * gcc.target/arm/anddi3-opt2.c: Likewise.
4563
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4565
+ Backport from trunk r197642.
4566
+ 2013-04-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
4568
+ * gcc.target/arm/minmax_minus.c: New test.
4570
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4572
+ Backport from trunk r197530,197921.
4573
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
4575
+ * gcc.target/arm/peep-ldrd-1.c: New test.
4576
+ * gcc.target/arm/peep-strd-1.c: Likewise.
4578
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4580
+ Backport from trunk r197523.
4581
+ 2013-04-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
4583
+ * lib/target-supports.exp (add_options_for_arm_v8_neon):
4584
+ Add -march=armv8-a when we use v8 NEON.
4585
+ (check_effective_target_vect_call_btruncf): Remove arm-*-*-*.
4586
+ (check_effective_target_vect_call_ceilf): Likewise.
4587
+ (check_effective_target_vect_call_floorf): Likewise.
4588
+ (check_effective_target_vect_call_roundf): Likewise.
4589
+ (check_vect_support_and_set_flags): Remove check for arm_v8_neon.
4590
+ * gcc.target/arm/vect-rounding-btruncf.c: New testcase.
4591
+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise.
4592
+ * gcc.target/arm/vect-rounding-floorf.c: Likewise.
4593
+ * gcc.target/arm/vect-rounding-roundf.c: Likewise.
4595
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4597
+ Backport from trunk r197518-197522,197516-197528.
4598
+ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
4600
+ * gcc.target/arm/negdi-1.c: New test.
4601
+ * gcc.target/arm/negdi-2.c: Likewise.
4602
+ * gcc.target/arm/negdi-3.c: Likewise.
4603
+ * gcc.target/arm/negdi-4.c: Likewise.
4605
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4607
+ Backport from trunk r197489-197491.
4608
+ 2013-04-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
4610
+ * lib/target-supports.exp (check_effective_target_arm_v8_neon_hw):
4612
+ (check_effective_target_arm_v8_neon_ok_nocache):
4614
+ (check_effective_target_arm_v8_neon_ok): Change to use
4615
+ check_effective_target_arm_v8_neon_ok_nocache.
4616
+ (add_options_for_arm_v8_neon): Use et_arm_v8_neon_flags to set ARMv8
4618
+ (check_effective_target_vect_call_btruncf):
4619
+ Enable for arm and ARMv8 NEON.
4620
+ (check_effective_target_vect_call_ceilf): Likewise.
4621
+ (check_effective_target_vect_call_floorf): Likewise.
4622
+ (check_effective_target_vect_call_roundf): Likewise.
4623
+ (check_vect_support_and_set_flags): Handle ARMv8 NEON effective
4626
+2013-05-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4628
+ Backport from trunk r196795-196797,196957.
4629
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
4631
+ * gcc.target/aarch64/sbc.c: New test.
4633
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
4635
+ * gcc.target/aarch64/ror.c: New test.
4637
+ 2013-03-19 Ian Bolton <ian.bolton@arm.com>
4639
+ * gcc.target/aarch64/extr.c: New test.
4641
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4643
+ * GCC Linaro 4.8-2013.04 released.
4645
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4647
+ Backport from trunk r197052.
4648
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
4650
+ * gcc.target/arm/vseleqdf.c: New test.
4651
+ * gcc.target/arm/vseleqsf.c: Likewise.
4652
+ * gcc.target/arm/vselgedf.c: Likewise.
4653
+ * gcc.target/arm/vselgesf.c: Likewise.
4654
+ * gcc.target/arm/vselgtdf.c: Likewise.
4655
+ * gcc.target/arm/vselgtsf.c: Likewise.
4656
+ * gcc.target/arm/vselledf.c: Likewise.
4657
+ * gcc.target/arm/vsellesf.c: Likewise.
4658
+ * gcc.target/arm/vselltdf.c: Likewise.
4659
+ * gcc.target/arm/vselltsf.c: Likewise.
4660
+ * gcc.target/arm/vselnedf.c: Likewise.
4661
+ * gcc.target/arm/vselnesf.c: Likewise.
4662
+ * gcc.target/arm/vselvcdf.c: Likewise.
4663
+ * gcc.target/arm/vselvcsf.c: Likewise.
4664
+ * gcc.target/arm/vselvsdf.c: Likewise.
4665
+ * gcc.target/arm/vselvssf.c: Likewise.
4667
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4669
+ Backport from trunk r197051.
4670
+ 2013-03-25 Kyrylo Tkachov <kyrylo.tkachov at arm.com>
4672
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Move test
4674
+ * gcc.target/aarch64/atomic-comp-swap-release-acquire.x: ... to here.
4675
+ * gcc.target/aarch64/atomic-op-acq_rel.c: Move test body from here...
4676
+ * gcc.target/aarch64/atomic-op-acq_rel.x: ... to here.
4677
+ * gcc.target/aarch64/atomic-op-acquire.c: Move test body from here...
4678
+ * gcc.target/aarch64/atomic-op-acquire.x: ... to here.
4679
+ * gcc.target/aarch64/atomic-op-char.c: Move test body from here...
4680
+ * gcc.target/aarch64/atomic-op-char.x: ... to here.
4681
+ * gcc.target/aarch64/atomic-op-consume.c: Move test body from here...
4682
+ * gcc.target/aarch64/atomic-op-consume.x: ... to here.
4683
+ * gcc.target/aarch64/atomic-op-int.c: Move test body from here...
4684
+ * gcc.target/aarch64/atomic-op-int.x: ... to here.
4685
+ * gcc.target/aarch64/atomic-op-relaxed.c: Move test body from here...
4686
+ * gcc.target/aarch64/atomic-op-relaxed.x: ... to here.
4687
+ * gcc.target/aarch64/atomic-op-release.c: Move test body from here...
4688
+ * gcc.target/aarch64/atomic-op-release.x: ... to here.
4689
+ * gcc.target/aarch64/atomic-op-seq_cst.c: Move test body from here...
4690
+ * gcc.target/aarch64/atomic-op-seq_cst.x: ... to here.
4691
+ * gcc.target/aarch64/atomic-op-short.c: Move test body from here...
4692
+ * gcc.target/aarch64/atomic-op-short.x: ... to here.
4693
+ * gcc.target/arm/atomic-comp-swap-release-acquire.c: New test.
4694
+ * gcc.target/arm/atomic-op-acq_rel.c: Likewise.
4695
+ * gcc.target/arm/atomic-op-acquire.c: Likewise.
4696
+ * gcc.target/arm/atomic-op-char.c: Likewise.
4697
+ * gcc.target/arm/atomic-op-consume.c: Likewise.
4698
+ * gcc.target/arm/atomic-op-int.c: Likewise.
4699
+ * gcc.target/arm/atomic-op-relaxed.c: Likewise.
4700
+ * gcc.target/arm/atomic-op-release.c: Likewise.
4701
+ * gcc.target/arm/atomic-op-seq_cst.c: Likewise.
4702
+ * gcc.target/arm/atomic-op-short.c: Likewise.
4704
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4706
+ Backport from trunk r196876.
4707
+ 2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
4709
+ * gcc.target/arm/neon-for-64bits-1.c: New tests.
4710
+ * gcc.target/arm/neon-for-64bits-2.c: Likewise.
4712
+2013-04-08 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4714
+ Backport from trunk r196858.
4715
+ 2013-03-21 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
4717
+ * gcc.target/aarch64/vect.c: Test and result vector added
4718
+ for sabd and saba instructions.
4719
+ * gcc.target/aarch64/vect-compile.c: Check for sabd and saba
4720
+ instructions in assembly.
4721
+ * gcc.target/aarch64/vect.x: Add sabd and saba test functions.
4722
+ * gcc.target/aarch64/vect-fp.c: Test and result vector added
4723
+ for fabd instruction.
4724
+ * gcc.target/aarch64/vect-fp-compile.c: Check for fabd
4725
+ instruction in assembly.
4726
+ * gcc.target/aarch64/vect-fp.x: Add fabd test function.
4727
--- a/src/gcc/objcp/ChangeLog.linaro
4728
+++ b/src/gcc/objcp/ChangeLog.linaro
4730
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4732
+ * GCC Linaro 4.8-2013.04 released.
4733
--- a/src/gcc/cp/ChangeLog.linaro
4734
+++ b/src/gcc/cp/ChangeLog.linaro
4736
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4738
+ * GCC Linaro 4.8-2013.04 released.
4739
--- a/src/gcc/go/ChangeLog.linaro
4740
+++ b/src/gcc/go/ChangeLog.linaro
4742
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4744
+ * GCC Linaro 4.8-2013.04 released.
4745
--- a/src/gcc/ada/ChangeLog.linaro
4746
+++ b/src/gcc/ada/ChangeLog.linaro
4748
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4750
+ * GCC Linaro 4.8-2013.04 released.
4751
--- a/src/gcc/common/config/aarch64/aarch64-common.c
4752
+++ b/src/gcc/common/config/aarch64/aarch64-common.c
4755
/* Enable section anchors by default at -O1 or higher. */
4756
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
4757
+ /* Enable redundant extension instructions removal at -O2 and higher. */
4758
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
4759
{ OPT_LEVELS_NONE, 0, NULL, 0 }
4762
--- a/src/gcc/fortran/ChangeLog.linaro
4763
+++ b/src/gcc/fortran/ChangeLog.linaro
4765
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4767
+ * GCC Linaro 4.8-2013.04 released.
4768
--- a/src/gcc/configure.ac
4769
+++ b/src/gcc/configure.ac
4772
AC_SUBST(CONFIGURE_SPECS)
4774
-ACX_PKGVERSION([GCC])
4775
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
4776
ACX_BUGURL([http://gcc.gnu.org/bugs.html])
4778
# Sanity check enable_languages in case someone does not run the toplevel
4779
--- a/src/gcc/coretypes.h
4780
+++ b/src/gcc/coretypes.h
4782
typedef union gimple_statement_d *gimple;
4783
typedef const union gimple_statement_d *const_gimple;
4784
typedef gimple gimple_seq;
4785
+struct gimple_stmt_iterator_d;
4786
+typedef struct gimple_stmt_iterator_d gimple_stmt_iterator;
4788
typedef union section section;
4790
--- a/src/gcc/gimple-fold.c
4791
+++ b/src/gcc/gimple-fold.c
4792
@@ -1143,6 +1143,8 @@
4793
gimplify_and_update_call_from_tree (gsi, result);
4796
+ else if (DECL_BUILT_IN_CLASS (callee) == BUILT_IN_MD)
4797
+ changed |= targetm.gimple_fold_builtin (gsi);
4801
--- a/src/gcc/lto/ChangeLog.linaro
4802
+++ b/src/gcc/lto/ChangeLog.linaro
4804
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4806
+ * GCC Linaro 4.8-2013.04 released.
4807
--- a/src/gcc/po/ChangeLog.linaro
4808
+++ b/src/gcc/po/ChangeLog.linaro
4810
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
4812
+ * GCC Linaro 4.8-2013.04 released.
4813
--- a/src/gcc/gimple.h
4814
+++ b/src/gcc/gimple.h
4817
/* Iterator object for GIMPLE statement sequences. */
4820
+struct gimple_stmt_iterator_d
4822
/* Sequence node holding the current statement. */
4823
gimple_seq_node ptr;
4825
block/sequence is removed. */
4828
-} gimple_stmt_iterator;
4832
/* Data structure definitions for GIMPLE tuples. NOTE: word markers
4833
are for 64 bit hosts. */
4835
--- a/src/gcc/config/aarch64/aarch64-simd.md
4836
+++ b/src/gcc/config/aarch64/aarch64-simd.md
4838
; simd_dup duplicate element.
4839
; simd_dupgp duplicate general purpose register.
4840
; simd_ext bitwise extract from pair.
4841
+; simd_fabd floating absolute difference and accumulate.
4842
; simd_fadd floating point add/sub.
4843
; simd_fcmp floating point compare.
4844
; simd_fcvti floating point convert to integer.
4846
; simd_fmul floating point multiply.
4847
; simd_fmul_elt floating point multiply (by element).
4848
; simd_fnegabs floating point neg/abs.
4849
-; simd_frcpe floating point reciprocal estimate.
4850
-; simd_frcps floating point reciprocal step.
4851
-; simd_frecx floating point reciprocal exponent.
4852
+; simd_frecpe floating point reciprocal estimate.
4853
+; simd_frecps floating point reciprocal step.
4854
+; simd_frecpx floating point reciprocal exponent.
4855
; simd_frint floating point round to integer.
4856
; simd_fsqrt floating point square root.
4857
; simd_icvtf integer convert to floating point.
4880
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
4881
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
4882
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
4883
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
4884
- (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
4885
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
4886
+ (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
4887
(eq_attr "simd_type" "none") (const_string "none")
4889
(const_string "unknown")))
4893
(define_insn "neg<mode>2"
4894
- [(set (match_operand:VDQM 0 "register_operand" "=w")
4895
- (neg:VDQM (match_operand:VDQM 1 "register_operand" "w")))]
4896
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
4897
+ (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
4899
"neg\t%0.<Vtype>, %1.<Vtype>"
4900
[(set_attr "simd_type" "simd_negabs")
4901
@@ -520,6 +522,40 @@
4902
(set_attr "simd_mode" "<MODE>")]
4905
+(define_insn "abd<mode>_3"
4906
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4907
+ (abs:VDQ_BHSI (minus:VDQ_BHSI
4908
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
4909
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
4911
+ "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4912
+ [(set_attr "simd_type" "simd_abd")
4913
+ (set_attr "simd_mode" "<MODE>")]
4916
+(define_insn "aba<mode>_3"
4917
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4918
+ (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
4919
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
4920
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4921
+ (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
4923
+ "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4924
+ [(set_attr "simd_type" "simd_abd")
4925
+ (set_attr "simd_mode" "<MODE>")]
4928
+(define_insn "fabd<mode>_3"
4929
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
4930
+ (abs:VDQF (minus:VDQF
4931
+ (match_operand:VDQF 1 "register_operand" "w")
4932
+ (match_operand:VDQF 2 "register_operand" "w"))))]
4934
+ "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4935
+ [(set_attr "simd_type" "simd_fabd")
4936
+ (set_attr "simd_mode" "<MODE>")]
4939
(define_insn "and<mode>3"
4940
[(set (match_operand:VDQ 0 "register_operand" "=w")
4941
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
4942
@@ -1196,7 +1232,9 @@
4943
(set_attr "simd_mode" "<MODE>")]
4946
-(define_insn "aarch64_frint<frint_suffix><mode>"
4947
+;; Vector versions of the floating-point frint patterns.
4948
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
4949
+(define_insn "<frint_pattern><mode>2"
4950
[(set (match_operand:VDQF 0 "register_operand" "=w")
4951
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4953
@@ -1206,16 +1244,9 @@
4954
(set_attr "simd_mode" "<MODE>")]
4957
-;; Vector versions of the floating-point frint patterns.
4958
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
4959
-(define_expand "<frint_pattern><mode>2"
4960
- [(set (match_operand:VDQF 0 "register_operand")
4961
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
4966
-(define_insn "aarch64_fcvt<frint_suffix><su><mode>"
4967
+;; Vector versions of the fcvt standard patterns.
4968
+;; Expands to lbtrunc, lround, lceil, lfloor
4969
+(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
4970
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
4971
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
4972
[(match_operand:VDQF 1 "register_operand" "w")]
4973
@@ -1226,16 +1257,141 @@
4974
(set_attr "simd_mode" "<MODE>")]
4977
-;; Vector versions of the fcvt standard patterns.
4978
-;; Expands to lbtrunc, lround, lceil, lfloor
4979
-(define_expand "l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2"
4980
+(define_expand "<optab><VDQF:mode><fcvt_target>2"
4981
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
4982
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
4983
[(match_operand:VDQF 1 "register_operand")]
4989
+(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
4990
+ [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
4991
+ (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
4992
+ [(match_operand:VDQF 1 "register_operand")]
4997
+(define_expand "ftrunc<VDQF:mode>2"
4998
+ [(set (match_operand:VDQF 0 "register_operand")
4999
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
5004
+(define_insn "<optab><fcvt_target><VDQF:mode>2"
5005
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
5007
+ (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
5009
+ "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
5010
+ [(set_attr "simd_type" "simd_icvtf")
5011
+ (set_attr "simd_mode" "<MODE>")]
5014
+;; Conversions between vectors of floats and doubles.
5015
+;; Contains a mix of patterns to match standard pattern names
5016
+;; and those for intrinsics.
5018
+;; Float widening operations.
5020
+(define_insn "vec_unpacks_lo_v4sf"
5021
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5022
+ (float_extend:V2DF
5024
+ (match_operand:V4SF 1 "register_operand" "w")
5025
+ (parallel [(const_int 0) (const_int 1)])
5028
+ "fcvtl\\t%0.2d, %1.2s"
5029
+ [(set_attr "simd_type" "simd_fcvtl")
5030
+ (set_attr "simd_mode" "V2DF")]
5033
+(define_insn "aarch64_float_extend_lo_v2df"
5034
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5035
+ (float_extend:V2DF
5036
+ (match_operand:V2SF 1 "register_operand" "w")))]
5038
+ "fcvtl\\t%0.2d, %1.2s"
5039
+ [(set_attr "simd_type" "simd_fcvtl")
5040
+ (set_attr "simd_mode" "V2DF")]
5043
+(define_insn "vec_unpacks_hi_v4sf"
5044
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
5045
+ (float_extend:V2DF
5047
+ (match_operand:V4SF 1 "register_operand" "w")
5048
+ (parallel [(const_int 2) (const_int 3)])
5051
+ "fcvtl2\\t%0.2d, %1.4s"
5052
+ [(set_attr "simd_type" "simd_fcvtl")
5053
+ (set_attr "simd_mode" "V2DF")]
5056
+;; Float narrowing operations.
5058
+(define_insn "aarch64_float_truncate_lo_v2sf"
5059
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
5060
+ (float_truncate:V2SF
5061
+ (match_operand:V2DF 1 "register_operand" "w")))]
5063
+ "fcvtn\\t%0.2s, %1.2d"
5064
+ [(set_attr "simd_type" "simd_fcvtl")
5065
+ (set_attr "simd_mode" "V2SF")]
5068
+(define_insn "aarch64_float_truncate_hi_v4sf"
5069
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
5071
+ (match_operand:V2SF 1 "register_operand" "0")
5072
+ (float_truncate:V2SF
5073
+ (match_operand:V2DF 2 "register_operand" "w"))))]
5075
+ "fcvtn2\\t%0.4s, %2.2d"
5076
+ [(set_attr "simd_type" "simd_fcvtl")
5077
+ (set_attr "simd_mode" "V4SF")]
5080
+(define_expand "vec_pack_trunc_v2df"
5081
+ [(set (match_operand:V4SF 0 "register_operand")
5083
+ (float_truncate:V2SF
5084
+ (match_operand:V2DF 1 "register_operand"))
5085
+ (float_truncate:V2SF
5086
+ (match_operand:V2DF 2 "register_operand"))
5090
+ rtx tmp = gen_reg_rtx (V2SFmode);
5091
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1]));
5092
+ emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
5093
+ tmp, operands[2]));
5098
+(define_expand "vec_pack_trunc_df"
5099
+ [(set (match_operand:V2SF 0 "register_operand")
5101
+ (float_truncate:SF
5102
+ (match_operand:DF 1 "register_operand"))
5103
+ (float_truncate:SF
5104
+ (match_operand:DF 2 "register_operand"))
5108
+ rtx tmp = gen_reg_rtx (V2SFmode);
5109
+ emit_insn (gen_move_lo_quad_v2df (tmp, operands[1]));
5110
+ emit_insn (gen_move_hi_quad_v2df (tmp, operands[2]));
5111
+ emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
5116
(define_insn "aarch64_vmls<mode>"
5117
[(set (match_operand:VDQF 0 "register_operand" "=w")
5118
(minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
5119
@@ -1305,7 +1461,7 @@
5121
;; FP 'across lanes' add.
5123
-(define_insn "aarch64_addvv4sf"
5124
+(define_insn "aarch64_addpv4sf"
5125
[(set (match_operand:V4SF 0 "register_operand" "=w")
5126
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
5128
@@ -1321,8 +1477,8 @@
5131
rtx tmp = gen_reg_rtx (V4SFmode);
5132
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
5133
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
5134
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
5135
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
5139
@@ -1332,11 +1488,21 @@
5142
rtx tmp = gen_reg_rtx (V4SFmode);
5143
- emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
5144
- emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
5145
+ emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
5146
+ emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
5150
+(define_expand "aarch64_addvv4sf"
5151
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
5152
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
5156
+ emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
5160
(define_insn "aarch64_addv<mode>"
5161
[(set (match_operand:V2F 0 "register_operand" "=w")
5162
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
5163
@@ -1463,21 +1629,33 @@
5164
(set_attr "simd_mode" "V2SI")]
5167
-;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register
5168
-;; allocation. For an intrinsic of form:
5169
-;; vD = bsl_* (vS, vN, vM)
5170
+;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
5172
+;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
5175
+;; Thus our BSL is of the form:
5176
+;; op0 = bsl (mask, op2, op3)
5177
;; We can use any of:
5178
-;; bsl vS, vN, vM (if D = S)
5179
-;; bit vD, vN, vS (if D = M, so 1-bits in vS choose bits from vN, else vM)
5180
-;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN)
5183
+;; bsl mask, op1, op2
5184
+;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
5185
+;; bit op0, op2, mask
5186
+;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
5187
+;; bif op0, op1, mask
5189
(define_insn "aarch64_simd_bsl<mode>_internal"
5190
[(set (match_operand:VALL 0 "register_operand" "=w,w,w")
5192
- [(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
5193
- (match_operand:VALL 2 "register_operand" " w,w,0")
5194
- (match_operand:VALL 3 "register_operand" " w,0,w")]
5198
+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
5199
+ (match_operand:VALL 2 "register_operand" " w,w,0"))
5201
+ (not:<V_cmp_result>
5202
+ (match_dup:<V_cmp_result> 1))
5203
+ (match_operand:VALL 3 "register_operand" " w,0,w"))
5207
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
5208
@@ -1486,15 +1664,17 @@
5211
(define_expand "aarch64_simd_bsl<mode>"
5212
- [(set (match_operand:VALL 0 "register_operand")
5213
- (unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand")
5214
- (match_operand:VALL 2 "register_operand")
5215
- (match_operand:VALL 3 "register_operand")]
5218
+ [(match_operand:VALL 0 "register_operand")
5219
+ (match_operand:<V_cmp_result> 1 "register_operand")
5220
+ (match_operand:VALL 2 "register_operand")
5221
+ (match_operand:VALL 3 "register_operand")]
5224
/* We can't alias operands together if they have different modes. */
5225
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
5226
+ emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
5227
+ operands[2], operands[3]));
5231
(define_expand "aarch64_vcond_internal<mode>"
5232
@@ -2837,28 +3017,6 @@
5233
(set_attr "simd_mode" "<MODE>")]
5238
-(define_expand "aarch64_sshl_n<mode>"
5239
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5240
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
5241
- (match_operand:SI 2 "immediate_operand" "i")]
5244
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
5248
-(define_expand "aarch64_ushl_n<mode>"
5249
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5250
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
5251
- (match_operand:SI 2 "immediate_operand" "i")]
5254
- emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2]));
5260
(define_insn "aarch64_<sur>shll_n<mode>"
5261
@@ -2903,28 +3061,6 @@
5262
(set_attr "simd_mode" "<MODE>")]
5267
-(define_expand "aarch64_sshr_n<mode>"
5268
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5269
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
5270
- (match_operand:SI 2 "immediate_operand" "i")]
5273
- emit_insn (gen_ashr<mode>3 (operands[0], operands[1], operands[2]));
5277
-(define_expand "aarch64_ushr_n<mode>"
5278
- [(match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5279
- (match_operand:VSDQ_I_DI 1 "register_operand" "w")
5280
- (match_operand:SI 2 "immediate_operand" "i")]
5283
- emit_insn (gen_lshr<mode>3 (operands[0], operands[1], operands[2]));
5289
(define_insn "aarch64_<sur>shr_n<mode>"
5290
@@ -3081,19 +3217,6 @@
5291
(set_attr "simd_mode" "DI")]
5296
-(define_expand "aarch64_<maxmin><mode>"
5297
- [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
5298
- (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
5299
- (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
5302
- emit_insn (gen_<maxmin><mode>3 (operands[0], operands[1], operands[2]));
5307
(define_insn "aarch64_<fmaxmin><mode>"
5308
[(set (match_operand:VDQF 0 "register_operand" "=w")
5309
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
5310
@@ -3116,16 +3239,6 @@
5311
(set_attr "simd_mode" "<MODE>")]
5314
-(define_expand "aarch64_sqrt<mode>"
5315
- [(match_operand:VDQF 0 "register_operand" "=w")
5316
- (match_operand:VDQF 1 "register_operand" "w")]
5319
- emit_insn (gen_sqrt<mode>2 (operands[0], operands[1]));
5324
;; Patterns for vector struct loads and stores.
5326
(define_insn "vec_load_lanesoi<mode>"
5327
@@ -3690,3 +3803,25 @@
5328
"ld1r\\t{%0.<Vtype>}, %1"
5329
[(set_attr "simd_type" "simd_load1r")
5330
(set_attr "simd_mode" "<MODE>")])
5332
+(define_insn "aarch64_frecpe<mode>"
5333
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
5334
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5337
+ "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5338
+ [(set_attr "simd_type" "simd_frecpe")
5339
+ (set_attr "simd_mode" "<MODE>")]
5342
+(define_insn "aarch64_frecps<mode>"
5343
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
5344
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
5345
+ (match_operand:VDQF 2 "register_operand" "w")]
5348
+ "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5349
+ [(set_attr "simd_type" "simd_frecps")
5350
+ (set_attr "simd_mode" "<MODE>")]
5353
--- a/src/gcc/config/aarch64/aarch64-elf.h
5354
+++ b/src/gcc/config/aarch64/aarch64-elf.h
5357
#define ASM_COMMENT_START "//"
5359
-#define REGISTER_PREFIX ""
5360
#define LOCAL_LABEL_PREFIX "."
5361
#define USER_LABEL_PREFIX ""
5363
--- a/src/gcc/config/aarch64/arm_neon.h
5364
+++ b/src/gcc/config/aarch64/arm_neon.h
5365
@@ -4468,17 +4468,6 @@
5369
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5370
-vabs_f32 (float32x2_t a)
5372
- float32x2_t result;
5373
- __asm__ ("fabs %0.2s,%1.2s"
5376
- : /* No clobbers */);
5380
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5381
vabs_s8 (int8x8_t a)
5383
@@ -4512,28 +4501,6 @@
5387
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5388
-vabsq_f32 (float32x4_t a)
5390
- float32x4_t result;
5391
- __asm__ ("fabs %0.4s,%1.4s"
5394
- : /* No clobbers */);
5398
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5399
-vabsq_f64 (float64x2_t a)
5401
- float64x2_t result;
5402
- __asm__ ("fabs %0.2d,%1.2d"
5405
- : /* No clobbers */);
5409
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5410
vabsq_s8 (int8x16_t a)
5412
@@ -5915,100 +5882,12 @@
5414
/* vcvt_f32_f16 not supported */
5416
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5417
-vcvt_f32_f64 (float64x2_t a)
5419
- float32x2_t result;
5420
- __asm__ ("fcvtn %0.2s,%1.2d"
5423
- : /* No clobbers */);
5427
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5428
-vcvt_f32_s32 (int32x2_t a)
5430
- float32x2_t result;
5431
- __asm__ ("scvtf %0.2s, %1.2s"
5434
- : /* No clobbers */);
5438
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5439
-vcvt_f32_u32 (uint32x2_t a)
5441
- float32x2_t result;
5442
- __asm__ ("ucvtf %0.2s, %1.2s"
5445
- : /* No clobbers */);
5449
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5450
-vcvt_f64_f32 (float32x2_t a)
5452
- float64x2_t result;
5453
- __asm__ ("fcvtl %0.2d,%1.2s"
5456
- : /* No clobbers */);
5460
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5461
-vcvt_f64_s64 (uint64x1_t a)
5463
- float64x1_t result;
5464
- __asm__ ("scvtf %d0, %d1"
5467
- : /* No clobbers */);
5471
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5472
-vcvt_f64_u64 (uint64x1_t a)
5474
- float64x1_t result;
5475
- __asm__ ("ucvtf %d0, %d1"
5478
- : /* No clobbers */);
5482
/* vcvt_high_f16_f32 not supported */
5484
/* vcvt_high_f32_f16 not supported */
5486
static float32x2_t vdup_n_f32 (float32_t);
5488
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5489
-vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
5491
- float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
5492
- __asm__ ("fcvtn2 %0.4s,%2.2d"
5495
- : /* No clobbers */);
5499
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5500
-vcvt_high_f64_f32 (float32x4_t a)
5502
- float64x2_t result;
5503
- __asm__ ("fcvtl2 %0.2d,%1.4s"
5506
- : /* No clobbers */);
5510
#define vcvt_n_f32_s32(a, b) \
5513
@@ -6057,160 +5936,6 @@
5517
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5518
-vcvt_s32_f32 (float32x2_t a)
5521
- __asm__ ("fcvtzs %0.2s, %1.2s"
5524
- : /* No clobbers */);
5528
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5529
-vcvt_u32_f32 (float32x2_t a)
5531
- uint32x2_t result;
5532
- __asm__ ("fcvtzu %0.2s, %1.2s"
5535
- : /* No clobbers */);
5539
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5540
-vcvta_s32_f32 (float32x2_t a)
5543
- __asm__ ("fcvtas %0.2s, %1.2s"
5546
- : /* No clobbers */);
5550
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5551
-vcvta_u32_f32 (float32x2_t a)
5553
- uint32x2_t result;
5554
- __asm__ ("fcvtau %0.2s, %1.2s"
5557
- : /* No clobbers */);
5561
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5562
-vcvtad_s64_f64 (float64_t a)
5565
- __asm__ ("fcvtas %d0,%d1"
5568
- : /* No clobbers */);
5572
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5573
-vcvtad_u64_f64 (float64_t a)
5576
- __asm__ ("fcvtau %d0,%d1"
5579
- : /* No clobbers */);
5583
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5584
-vcvtaq_s32_f32 (float32x4_t a)
5587
- __asm__ ("fcvtas %0.4s, %1.4s"
5590
- : /* No clobbers */);
5594
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5595
-vcvtaq_s64_f64 (float64x2_t a)
5598
- __asm__ ("fcvtas %0.2d, %1.2d"
5601
- : /* No clobbers */);
5605
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5606
-vcvtaq_u32_f32 (float32x4_t a)
5608
- uint32x4_t result;
5609
- __asm__ ("fcvtau %0.4s, %1.4s"
5612
- : /* No clobbers */);
5616
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5617
-vcvtaq_u64_f64 (float64x2_t a)
5619
- uint64x2_t result;
5620
- __asm__ ("fcvtau %0.2d, %1.2d"
5623
- : /* No clobbers */);
5627
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5628
-vcvtas_s64_f64 (float32_t a)
5631
- __asm__ ("fcvtas %s0,%s1"
5634
- : /* No clobbers */);
5638
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5639
-vcvtas_u64_f64 (float32_t a)
5642
- __asm__ ("fcvtau %s0,%s1"
5645
- : /* No clobbers */);
5649
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
5650
-vcvtd_f64_s64 (int64_t a)
5653
- __asm__ ("scvtf %d0,%d1"
5656
- : /* No clobbers */);
5660
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5661
-vcvtd_f64_u64 (uint64_t a)
5664
- __asm__ ("ucvtf %d0,%d1"
5667
- : /* No clobbers */);
5671
#define vcvtd_n_f64_s64(a, b) \
5674
@@ -6259,402 +5984,6 @@
5678
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5679
-vcvtd_s64_f64 (float64_t a)
5682
- __asm__ ("fcvtzs %d0,%d1"
5685
- : /* No clobbers */);
5689
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5690
-vcvtd_u64_f64 (float64_t a)
5693
- __asm__ ("fcvtzu %d0,%d1"
5696
- : /* No clobbers */);
5700
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5701
-vcvtm_s32_f32 (float32x2_t a)
5704
- __asm__ ("fcvtms %0.2s, %1.2s"
5707
- : /* No clobbers */);
5711
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5712
-vcvtm_u32_f32 (float32x2_t a)
5714
- uint32x2_t result;
5715
- __asm__ ("fcvtmu %0.2s, %1.2s"
5718
- : /* No clobbers */);
5722
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5723
-vcvtmd_s64_f64 (float64_t a)
5726
- __asm__ ("fcvtms %d0,%d1"
5729
- : /* No clobbers */);
5733
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5734
-vcvtmd_u64_f64 (float64_t a)
5737
- __asm__ ("fcvtmu %d0,%d1"
5740
- : /* No clobbers */);
5744
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5745
-vcvtmq_s32_f32 (float32x4_t a)
5748
- __asm__ ("fcvtms %0.4s, %1.4s"
5751
- : /* No clobbers */);
5755
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5756
-vcvtmq_s64_f64 (float64x2_t a)
5759
- __asm__ ("fcvtms %0.2d, %1.2d"
5762
- : /* No clobbers */);
5766
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5767
-vcvtmq_u32_f32 (float32x4_t a)
5769
- uint32x4_t result;
5770
- __asm__ ("fcvtmu %0.4s, %1.4s"
5773
- : /* No clobbers */);
5777
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5778
-vcvtmq_u64_f64 (float64x2_t a)
5780
- uint64x2_t result;
5781
- __asm__ ("fcvtmu %0.2d, %1.2d"
5784
- : /* No clobbers */);
5788
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5789
-vcvtms_s64_f64 (float32_t a)
5792
- __asm__ ("fcvtms %s0,%s1"
5795
- : /* No clobbers */);
5799
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5800
-vcvtms_u64_f64 (float32_t a)
5803
- __asm__ ("fcvtmu %s0,%s1"
5806
- : /* No clobbers */);
5810
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5811
-vcvtn_s32_f32 (float32x2_t a)
5814
- __asm__ ("fcvtns %0.2s, %1.2s"
5817
- : /* No clobbers */);
5821
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5822
-vcvtn_u32_f32 (float32x2_t a)
5824
- uint32x2_t result;
5825
- __asm__ ("fcvtnu %0.2s, %1.2s"
5828
- : /* No clobbers */);
5832
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5833
-vcvtnd_s64_f64 (float64_t a)
5836
- __asm__ ("fcvtns %d0,%d1"
5839
- : /* No clobbers */);
5843
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5844
-vcvtnd_u64_f64 (float64_t a)
5847
- __asm__ ("fcvtnu %d0,%d1"
5850
- : /* No clobbers */);
5854
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5855
-vcvtnq_s32_f32 (float32x4_t a)
5858
- __asm__ ("fcvtns %0.4s, %1.4s"
5861
- : /* No clobbers */);
5865
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5866
-vcvtnq_s64_f64 (float64x2_t a)
5869
- __asm__ ("fcvtns %0.2d, %1.2d"
5872
- : /* No clobbers */);
5876
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5877
-vcvtnq_u32_f32 (float32x4_t a)
5879
- uint32x4_t result;
5880
- __asm__ ("fcvtnu %0.4s, %1.4s"
5883
- : /* No clobbers */);
5887
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5888
-vcvtnq_u64_f64 (float64x2_t a)
5890
- uint64x2_t result;
5891
- __asm__ ("fcvtnu %0.2d, %1.2d"
5894
- : /* No clobbers */);
5898
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5899
-vcvtns_s64_f64 (float32_t a)
5902
- __asm__ ("fcvtns %s0,%s1"
5905
- : /* No clobbers */);
5909
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5910
-vcvtns_u64_f64 (float32_t a)
5913
- __asm__ ("fcvtnu %s0,%s1"
5916
- : /* No clobbers */);
5920
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5921
-vcvtp_s32_f32 (float32x2_t a)
5924
- __asm__ ("fcvtps %0.2s, %1.2s"
5927
- : /* No clobbers */);
5931
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5932
-vcvtp_u32_f32 (float32x2_t a)
5934
- uint32x2_t result;
5935
- __asm__ ("fcvtpu %0.2s, %1.2s"
5938
- : /* No clobbers */);
5942
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5943
-vcvtpd_s64_f64 (float64_t a)
5946
- __asm__ ("fcvtps %d0,%d1"
5949
- : /* No clobbers */);
5953
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
5954
-vcvtpd_u64_f64 (float64_t a)
5957
- __asm__ ("fcvtpu %d0,%d1"
5960
- : /* No clobbers */);
5964
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5965
-vcvtpq_s32_f32 (float32x4_t a)
5968
- __asm__ ("fcvtps %0.4s, %1.4s"
5971
- : /* No clobbers */);
5975
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5976
-vcvtpq_s64_f64 (float64x2_t a)
5979
- __asm__ ("fcvtps %0.2d, %1.2d"
5982
- : /* No clobbers */);
5986
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5987
-vcvtpq_u32_f32 (float32x4_t a)
5989
- uint32x4_t result;
5990
- __asm__ ("fcvtpu %0.4s, %1.4s"
5993
- : /* No clobbers */);
5997
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5998
-vcvtpq_u64_f64 (float64x2_t a)
6000
- uint64x2_t result;
6001
- __asm__ ("fcvtpu %0.2d, %1.2d"
6004
- : /* No clobbers */);
6008
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6009
-vcvtps_s64_f64 (float32_t a)
6012
- __asm__ ("fcvtps %s0,%s1"
6015
- : /* No clobbers */);
6019
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6020
-vcvtps_u64_f64 (float32_t a)
6023
- __asm__ ("fcvtpu %s0,%s1"
6026
- : /* No clobbers */);
6030
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6031
-vcvtq_f32_s32 (int32x4_t a)
6033
- float32x4_t result;
6034
- __asm__ ("scvtf %0.4s, %1.4s"
6037
- : /* No clobbers */);
6041
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6042
-vcvtq_f32_u32 (uint32x4_t a)
6044
- float32x4_t result;
6045
- __asm__ ("ucvtf %0.4s, %1.4s"
6048
- : /* No clobbers */);
6052
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6053
-vcvtq_f64_s64 (int64x2_t a)
6055
- float64x2_t result;
6056
- __asm__ ("scvtf %0.2d, %1.2d"
6059
- : /* No clobbers */);
6063
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6064
-vcvtq_f64_u64 (uint64x2_t a)
6066
- float64x2_t result;
6067
- __asm__ ("ucvtf %0.2d, %1.2d"
6070
- : /* No clobbers */);
6074
#define vcvtq_n_f32_s32(a, b) \
6077
@@ -6751,72 +6080,6 @@
6081
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6082
-vcvtq_s32_f32 (float32x4_t a)
6085
- __asm__ ("fcvtzs %0.4s, %1.4s"
6088
- : /* No clobbers */);
6092
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6093
-vcvtq_s64_f64 (float64x2_t a)
6096
- __asm__ ("fcvtzs %0.2d, %1.2d"
6099
- : /* No clobbers */);
6103
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6104
-vcvtq_u32_f32 (float32x4_t a)
6106
- uint32x4_t result;
6107
- __asm__ ("fcvtzu %0.4s, %1.4s"
6110
- : /* No clobbers */);
6114
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6115
-vcvtq_u64_f64 (float64x2_t a)
6117
- uint64x2_t result;
6118
- __asm__ ("fcvtzu %0.2d, %1.2d"
6121
- : /* No clobbers */);
6125
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6126
-vcvts_f64_s32 (int32_t a)
6129
- __asm__ ("scvtf %s0,%s1"
6132
- : /* No clobbers */);
6136
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6137
-vcvts_f64_u32 (uint32_t a)
6140
- __asm__ ("ucvtf %s0,%s1"
6143
- : /* No clobbers */);
6147
#define vcvts_n_f32_s32(a, b) \
6150
@@ -6865,28 +6128,6 @@
6154
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6155
-vcvts_s64_f64 (float32_t a)
6158
- __asm__ ("fcvtzs %s0,%s1"
6161
- : /* No clobbers */);
6165
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6166
-vcvts_u64_f64 (float32_t a)
6169
- __asm__ ("fcvtzu %s0,%s1"
6172
- : /* No clobbers */);
6176
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6177
vcvtx_f32_f64 (float64x2_t a)
6179
@@ -14556,17 +13797,6 @@
6183
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6184
-vrecpe_f32 (float32x2_t a)
6186
- float32x2_t result;
6187
- __asm__ ("frecpe %0.2s,%1.2s"
6190
- : /* No clobbers */);
6194
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6195
vrecpe_u32 (uint32x2_t a)
6197
@@ -14578,39 +13808,6 @@
6201
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6202
-vrecped_f64 (float64_t a)
6205
- __asm__ ("frecpe %d0,%d1"
6208
- : /* No clobbers */);
6212
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6213
-vrecpeq_f32 (float32x4_t a)
6215
- float32x4_t result;
6216
- __asm__ ("frecpe %0.4s,%1.4s"
6219
- : /* No clobbers */);
6223
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6224
-vrecpeq_f64 (float64x2_t a)
6226
- float64x2_t result;
6227
- __asm__ ("frecpe %0.2d,%1.2d"
6230
- : /* No clobbers */);
6234
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6235
vrecpeq_u32 (uint32x4_t a)
6237
@@ -14622,94 +13819,6 @@
6241
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6242
-vrecpes_f32 (float32_t a)
6245
- __asm__ ("frecpe %s0,%s1"
6248
- : /* No clobbers */);
6252
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6253
-vrecps_f32 (float32x2_t a, float32x2_t b)
6255
- float32x2_t result;
6256
- __asm__ ("frecps %0.2s,%1.2s,%2.2s"
6259
- : /* No clobbers */);
6263
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6264
-vrecpsd_f64 (float64_t a, float64_t b)
6267
- __asm__ ("frecps %d0,%d1,%d2"
6270
- : /* No clobbers */);
6274
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6275
-vrecpsq_f32 (float32x4_t a, float32x4_t b)
6277
- float32x4_t result;
6278
- __asm__ ("frecps %0.4s,%1.4s,%2.4s"
6281
- : /* No clobbers */);
6285
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6286
-vrecpsq_f64 (float64x2_t a, float64x2_t b)
6288
- float64x2_t result;
6289
- __asm__ ("frecps %0.2d,%1.2d,%2.2d"
6292
- : /* No clobbers */);
6296
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6297
-vrecpss_f32 (float32_t a, float32_t b)
6300
- __asm__ ("frecps %s0,%s1,%s2"
6303
- : /* No clobbers */);
6307
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6308
-vrecpxd_f64 (float64_t a)
6311
- __asm__ ("frecpe %d0,%d1"
6314
- : /* No clobbers */);
6318
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6319
-vrecpxs_f32 (float32_t a)
6322
- __asm__ ("frecpe %s0,%s1"
6325
- : /* No clobbers */);
6329
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6330
vrev16_p8 (poly8x8_t a)
6332
@@ -15106,171 +14215,6 @@
6336
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6337
-vrnd_f32 (float32x2_t a)
6339
- float32x2_t result;
6340
- __asm__ ("frintz %0.2s,%1.2s"
6343
- : /* No clobbers */);
6347
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6348
-vrnda_f32 (float32x2_t a)
6350
- float32x2_t result;
6351
- __asm__ ("frinta %0.2s,%1.2s"
6354
- : /* No clobbers */);
6358
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6359
-vrndm_f32 (float32x2_t a)
6361
- float32x2_t result;
6362
- __asm__ ("frintm %0.2s,%1.2s"
6365
- : /* No clobbers */);
6369
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6370
-vrndn_f32 (float32x2_t a)
6372
- float32x2_t result;
6373
- __asm__ ("frintn %0.2s,%1.2s"
6376
- : /* No clobbers */);
6380
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6381
-vrndp_f32 (float32x2_t a)
6383
- float32x2_t result;
6384
- __asm__ ("frintp %0.2s,%1.2s"
6387
- : /* No clobbers */);
6391
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6392
-vrndq_f32 (float32x4_t a)
6394
- float32x4_t result;
6395
- __asm__ ("frintz %0.4s,%1.4s"
6398
- : /* No clobbers */);
6402
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6403
-vrndq_f64 (float64x2_t a)
6405
- float64x2_t result;
6406
- __asm__ ("frintz %0.2d,%1.2d"
6409
- : /* No clobbers */);
6413
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6414
-vrndqa_f32 (float32x4_t a)
6416
- float32x4_t result;
6417
- __asm__ ("frinta %0.4s,%1.4s"
6420
- : /* No clobbers */);
6424
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6425
-vrndqa_f64 (float64x2_t a)
6427
- float64x2_t result;
6428
- __asm__ ("frinta %0.2d,%1.2d"
6431
- : /* No clobbers */);
6435
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6436
-vrndqm_f32 (float32x4_t a)
6438
- float32x4_t result;
6439
- __asm__ ("frintm %0.4s,%1.4s"
6442
- : /* No clobbers */);
6446
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6447
-vrndqm_f64 (float64x2_t a)
6449
- float64x2_t result;
6450
- __asm__ ("frintm %0.2d,%1.2d"
6453
- : /* No clobbers */);
6457
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6458
-vrndqn_f32 (float32x4_t a)
6460
- float32x4_t result;
6461
- __asm__ ("frintn %0.4s,%1.4s"
6464
- : /* No clobbers */);
6468
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6469
-vrndqn_f64 (float64x2_t a)
6471
- float64x2_t result;
6472
- __asm__ ("frintn %0.2d,%1.2d"
6475
- : /* No clobbers */);
6479
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6480
-vrndqp_f32 (float32x4_t a)
6482
- float32x4_t result;
6483
- __asm__ ("frintp %0.4s,%1.4s"
6486
- : /* No clobbers */);
6490
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6491
-vrndqp_f64 (float64x2_t a)
6493
- float64x2_t result;
6494
- __asm__ ("frintp %0.2d,%1.2d"
6497
- : /* No clobbers */);
6501
#define vrshrn_high_n_s16(a, b, c) \
6504
@@ -19849,6 +18793,26 @@
6506
/* Start of optimal implementations in approved order. */
6510
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6511
+vabs_f32 (float32x2_t __a)
6513
+ return __builtin_aarch64_absv2sf (__a);
6516
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6517
+vabsq_f32 (float32x4_t __a)
6519
+ return __builtin_aarch64_absv4sf (__a);
6522
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6523
+vabsq_f64 (float64x2_t __a)
6525
+ return __builtin_aarch64_absv2df (__a);
6530
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6531
@@ -19863,6 +18827,27 @@
6535
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6536
+vaddv_f32 (float32x2_t __a)
6538
+ float32x2_t t = __builtin_aarch64_addvv2sf (__a);
6539
+ return vget_lane_f32 (t, 0);
6542
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6543
+vaddvq_f32 (float32x4_t __a)
6545
+ float32x4_t t = __builtin_aarch64_addvv4sf (__a);
6546
+ return vgetq_lane_f32 (t, 0);
6549
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6550
+vaddvq_f64 (float64x2_t __a)
6552
+ float64x2_t t = __builtin_aarch64_addvv2df (__a);
6553
+ return vgetq_lane_f64 (t, 0);
6558
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6559
@@ -20487,6 +19472,437 @@
6560
return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
6563
+/* vcvt (double -> float). */
6565
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6566
+vcvt_f32_f64 (float64x2_t __a)
6568
+ return __builtin_aarch64_float_truncate_lo_v2sf (__a);
6571
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6572
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
6574
+ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
6577
+/* vcvt (float -> double). */
6579
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6580
+vcvt_f64_f32 (float32x2_t __a)
6583
+ return __builtin_aarch64_float_extend_lo_v2df (__a);
6586
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6587
+vcvt_high_f64_f32 (float32x4_t __a)
6589
+ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
6592
+/* vcvt (<u>int -> float) */
6594
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6595
+vcvtd_f64_s64 (int64_t __a)
6597
+ return (float64_t) __a;
6600
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
6601
+vcvtd_f64_u64 (uint64_t __a)
6603
+ return (float64_t) __a;
6606
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6607
+vcvts_f32_s32 (int32_t __a)
6609
+ return (float32_t) __a;
6612
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
6613
+vcvts_f32_u32 (uint32_t __a)
6615
+ return (float32_t) __a;
6618
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6619
+vcvt_f32_s32 (int32x2_t __a)
6621
+ return __builtin_aarch64_floatv2siv2sf (__a);
6624
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6625
+vcvt_f32_u32 (uint32x2_t __a)
6627
+ return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
6630
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6631
+vcvtq_f32_s32 (int32x4_t __a)
6633
+ return __builtin_aarch64_floatv4siv4sf (__a);
6636
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6637
+vcvtq_f32_u32 (uint32x4_t __a)
6639
+ return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
6642
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6643
+vcvtq_f64_s64 (int64x2_t __a)
6645
+ return __builtin_aarch64_floatv2div2df (__a);
6648
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6649
+vcvtq_f64_u64 (uint64x2_t __a)
6651
+ return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
6654
+/* vcvt (float -> <u>int) */
6656
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
6657
+vcvtd_s64_f64 (float64_t __a)
6659
+ return (int64_t) __a;
6662
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6663
+vcvtd_u64_f64 (float64_t __a)
6665
+ return (uint64_t) __a;
6668
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6669
+vcvts_s32_f32 (float32_t __a)
6671
+ return (int32_t) __a;
6674
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6675
+vcvts_u32_f32 (float32_t __a)
6677
+ return (uint32_t) __a;
6680
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6681
+vcvt_s32_f32 (float32x2_t __a)
6683
+ return __builtin_aarch64_lbtruncv2sfv2si (__a);
6686
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6687
+vcvt_u32_f32 (float32x2_t __a)
6689
+ /* TODO: This cast should go away when builtins have
6690
+ their correct types. */
6691
+ return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
6694
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6695
+vcvtq_s32_f32 (float32x4_t __a)
6697
+ return __builtin_aarch64_lbtruncv4sfv4si (__a);
6700
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6701
+vcvtq_u32_f32 (float32x4_t __a)
6703
+ /* TODO: This cast should go away when builtins have
6704
+ their correct types. */
6705
+ return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
6708
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6709
+vcvtq_s64_f64 (float64x2_t __a)
6711
+ return __builtin_aarch64_lbtruncv2dfv2di (__a);
6714
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6715
+vcvtq_u64_f64 (float64x2_t __a)
6717
+ /* TODO: This cast should go away when builtins have
6718
+ their correct types. */
6719
+ return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
6724
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
6725
+vcvtad_s64_f64 (float64_t __a)
6727
+ return __builtin_aarch64_lrounddfdi (__a);
6730
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6731
+vcvtad_u64_f64 (float64_t __a)
6733
+ return __builtin_aarch64_lroundudfdi (__a);
6736
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6737
+vcvtas_s32_f32 (float32_t __a)
6739
+ return __builtin_aarch64_lroundsfsi (__a);
6742
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6743
+vcvtas_u32_f32 (float32_t __a)
6745
+ return __builtin_aarch64_lroundusfsi (__a);
6748
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6749
+vcvta_s32_f32 (float32x2_t __a)
6751
+ return __builtin_aarch64_lroundv2sfv2si (__a);
6754
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6755
+vcvta_u32_f32 (float32x2_t __a)
6757
+ /* TODO: This cast should go away when builtins have
6758
+ their correct types. */
6759
+ return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
6762
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6763
+vcvtaq_s32_f32 (float32x4_t __a)
6765
+ return __builtin_aarch64_lroundv4sfv4si (__a);
6768
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6769
+vcvtaq_u32_f32 (float32x4_t __a)
6771
+ /* TODO: This cast should go away when builtins have
6772
+ their correct types. */
6773
+ return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
6776
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6777
+vcvtaq_s64_f64 (float64x2_t __a)
6779
+ return __builtin_aarch64_lroundv2dfv2di (__a);
6782
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6783
+vcvtaq_u64_f64 (float64x2_t __a)
6785
+ /* TODO: This cast should go away when builtins have
6786
+ their correct types. */
6787
+ return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
6792
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
6793
+vcvtmd_s64_f64 (float64_t __a)
6795
+ return __builtin_lfloor (__a);
6798
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6799
+vcvtmd_u64_f64 (float64_t __a)
6801
+ return __builtin_aarch64_lfloorudfdi (__a);
6804
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6805
+vcvtms_s32_f32 (float32_t __a)
6807
+ return __builtin_ifloorf (__a);
6810
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6811
+vcvtms_u32_f32 (float32_t __a)
6813
+ return __builtin_aarch64_lfloorusfsi (__a);
6816
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6817
+vcvtm_s32_f32 (float32x2_t __a)
6819
+ return __builtin_aarch64_lfloorv2sfv2si (__a);
6822
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6823
+vcvtm_u32_f32 (float32x2_t __a)
6825
+ /* TODO: This cast should go away when builtins have
6826
+ their correct types. */
6827
+ return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
6830
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6831
+vcvtmq_s32_f32 (float32x4_t __a)
6833
+ return __builtin_aarch64_lfloorv4sfv4si (__a);
6836
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6837
+vcvtmq_u32_f32 (float32x4_t __a)
6839
+ /* TODO: This cast should go away when builtins have
6840
+ their correct types. */
6841
+ return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
6844
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6845
+vcvtmq_s64_f64 (float64x2_t __a)
6847
+ return __builtin_aarch64_lfloorv2dfv2di (__a);
6850
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6851
+vcvtmq_u64_f64 (float64x2_t __a)
6853
+ /* TODO: This cast should go away when builtins have
6854
+ their correct types. */
6855
+ return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
6860
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
6861
+vcvtnd_s64_f64 (float64_t __a)
6863
+ return __builtin_aarch64_lfrintndfdi (__a);
6866
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6867
+vcvtnd_u64_f64 (float64_t __a)
6869
+ return __builtin_aarch64_lfrintnudfdi (__a);
6872
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6873
+vcvtns_s32_f32 (float32_t __a)
6875
+ return __builtin_aarch64_lfrintnsfsi (__a);
6878
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6879
+vcvtns_u32_f32 (float32_t __a)
6881
+ return __builtin_aarch64_lfrintnusfsi (__a);
6884
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6885
+vcvtn_s32_f32 (float32x2_t __a)
6887
+ return __builtin_aarch64_lfrintnv2sfv2si (__a);
6890
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6891
+vcvtn_u32_f32 (float32x2_t __a)
6893
+ /* TODO: This cast should go away when builtins have
6894
+ their correct types. */
6895
+ return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
6898
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6899
+vcvtnq_s32_f32 (float32x4_t __a)
6901
+ return __builtin_aarch64_lfrintnv4sfv4si (__a);
6904
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6905
+vcvtnq_u32_f32 (float32x4_t __a)
6907
+ /* TODO: This cast should go away when builtins have
6908
+ their correct types. */
6909
+ return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
6912
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6913
+vcvtnq_s64_f64 (float64x2_t __a)
6915
+ return __builtin_aarch64_lfrintnv2dfv2di (__a);
6918
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6919
+vcvtnq_u64_f64 (float64x2_t __a)
6921
+ /* TODO: This cast should go away when builtins have
6922
+ their correct types. */
6923
+ return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
6928
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
6929
+vcvtpd_s64_f64 (float64_t __a)
6931
+ return __builtin_lceil (__a);
6934
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6935
+vcvtpd_u64_f64 (float64_t __a)
6937
+ return __builtin_aarch64_lceiludfdi (__a);
6940
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
6941
+vcvtps_s32_f32 (float32_t __a)
6943
+ return __builtin_iceilf (__a);
6946
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6947
+vcvtps_u32_f32 (float32_t __a)
6949
+ return __builtin_aarch64_lceilusfsi (__a);
6952
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6953
+vcvtp_s32_f32 (float32x2_t __a)
6955
+ return __builtin_aarch64_lceilv2sfv2si (__a);
6958
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6959
+vcvtp_u32_f32 (float32x2_t __a)
6961
+ /* TODO: This cast should go away when builtins have
6962
+ their correct types. */
6963
+ return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
6966
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6967
+vcvtpq_s32_f32 (float32x4_t __a)
6969
+ return __builtin_aarch64_lceilv4sfv4si (__a);
6972
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6973
+vcvtpq_u32_f32 (float32x4_t __a)
6975
+ /* TODO: This cast should go away when builtins have
6976
+ their correct types. */
6977
+ return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
6980
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6981
+vcvtpq_s64_f64 (float64x2_t __a)
6983
+ return __builtin_aarch64_lceilv2dfv2di (__a);
6986
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6987
+vcvtpq_u64_f64 (float64x2_t __a)
6989
+ /* TODO: This cast should go away when builtins have
6990
+ their correct types. */
6991
+ return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
6996
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
6997
@@ -23115,6 +22531,223 @@
6998
return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
7003
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7004
+vrecpes_f32 (float32_t __a)
7006
+ return __builtin_aarch64_frecpesf (__a);
7009
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7010
+vrecped_f64 (float64_t __a)
7012
+ return __builtin_aarch64_frecpedf (__a);
7015
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7016
+vrecpe_f32 (float32x2_t __a)
7018
+ return __builtin_aarch64_frecpev2sf (__a);
7021
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7022
+vrecpeq_f32 (float32x4_t __a)
7024
+ return __builtin_aarch64_frecpev4sf (__a);
7027
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7028
+vrecpeq_f64 (float64x2_t __a)
7030
+ return __builtin_aarch64_frecpev2df (__a);
7035
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7036
+vrecpss_f32 (float32_t __a, float32_t __b)
7038
+ return __builtin_aarch64_frecpssf (__a, __b);
7041
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7042
+vrecpsd_f64 (float64_t __a, float64_t __b)
7044
+ return __builtin_aarch64_frecpsdf (__a, __b);
7047
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7048
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
7050
+ return __builtin_aarch64_frecpsv2sf (__a, __b);
7053
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7054
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
7056
+ return __builtin_aarch64_frecpsv4sf (__a, __b);
7059
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7060
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
7062
+ return __builtin_aarch64_frecpsv2df (__a, __b);
7067
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
7068
+vrecpxs_f32 (float32_t __a)
7070
+ return __builtin_aarch64_frecpxsf (__a);
7073
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
7074
+vrecpxd_f64 (float64_t __a)
7076
+ return __builtin_aarch64_frecpxdf (__a);
7081
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7082
+vrnd_f32 (float32x2_t __a)
7084
+ return __builtin_aarch64_btruncv2sf (__a);
7087
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7088
+vrndq_f32 (float32x4_t __a)
7090
+ return __builtin_aarch64_btruncv4sf (__a);
7093
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7094
+vrndq_f64 (float64x2_t __a)
7096
+ return __builtin_aarch64_btruncv2df (__a);
7101
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7102
+vrnda_f32 (float32x2_t __a)
7104
+ return __builtin_aarch64_roundv2sf (__a);
7107
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7108
+vrndaq_f32 (float32x4_t __a)
7110
+ return __builtin_aarch64_roundv4sf (__a);
7113
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7114
+vrndaq_f64 (float64x2_t __a)
7116
+ return __builtin_aarch64_roundv2df (__a);
7121
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7122
+vrndi_f32 (float32x2_t __a)
7124
+ return __builtin_aarch64_nearbyintv2sf (__a);
7127
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7128
+vrndiq_f32 (float32x4_t __a)
7130
+ return __builtin_aarch64_nearbyintv4sf (__a);
7133
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7134
+vrndiq_f64 (float64x2_t __a)
7136
+ return __builtin_aarch64_nearbyintv2df (__a);
7141
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7142
+vrndm_f32 (float32x2_t __a)
7144
+ return __builtin_aarch64_floorv2sf (__a);
7147
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7148
+vrndmq_f32 (float32x4_t __a)
7150
+ return __builtin_aarch64_floorv4sf (__a);
7153
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7154
+vrndmq_f64 (float64x2_t __a)
7156
+ return __builtin_aarch64_floorv2df (__a);
7161
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7162
+vrndn_f32 (float32x2_t __a)
7164
+ return __builtin_aarch64_frintnv2sf (__a);
7166
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7167
+vrndnq_f32 (float32x4_t __a)
7169
+ return __builtin_aarch64_frintnv4sf (__a);
7172
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7173
+vrndnq_f64 (float64x2_t __a)
7175
+ return __builtin_aarch64_frintnv2df (__a);
7180
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7181
+vrndp_f32 (float32x2_t __a)
7183
+ return __builtin_aarch64_ceilv2sf (__a);
7186
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7187
+vrndpq_f32 (float32x4_t __a)
7189
+ return __builtin_aarch64_ceilv4sf (__a);
7192
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7193
+vrndpq_f64 (float64x2_t __a)
7195
+ return __builtin_aarch64_ceilv2df (__a);
7200
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7201
+vrndx_f32 (float32x2_t __a)
7203
+ return __builtin_aarch64_rintv2sf (__a);
7206
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7207
+vrndxq_f32 (float32x4_t __a)
7209
+ return __builtin_aarch64_rintv4sf (__a);
7212
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7213
+vrndxq_f64 (float64x2_t __a)
7215
+ return __builtin_aarch64_rintv2df (__a);
7220
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7221
@@ -23458,109 +23091,109 @@
7222
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7223
vshl_n_s8 (int8x8_t __a, const int __b)
7225
- return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b);
7226
+ return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
7229
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7230
vshl_n_s16 (int16x4_t __a, const int __b)
7232
- return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b);
7233
+ return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
7236
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7237
vshl_n_s32 (int32x2_t __a, const int __b)
7239
- return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b);
7240
+ return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
7243
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7244
vshl_n_s64 (int64x1_t __a, const int __b)
7246
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
7247
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
7250
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7251
vshl_n_u8 (uint8x8_t __a, const int __b)
7253
- return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b);
7254
+ return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
7257
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7258
vshl_n_u16 (uint16x4_t __a, const int __b)
7260
- return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b);
7261
+ return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
7264
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7265
vshl_n_u32 (uint32x2_t __a, const int __b)
7267
- return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b);
7268
+ return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
7271
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7272
vshl_n_u64 (uint64x1_t __a, const int __b)
7274
- return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b);
7275
+ return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
7278
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7279
vshlq_n_s8 (int8x16_t __a, const int __b)
7281
- return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b);
7282
+ return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
7285
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7286
vshlq_n_s16 (int16x8_t __a, const int __b)
7288
- return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b);
7289
+ return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
7292
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7293
vshlq_n_s32 (int32x4_t __a, const int __b)
7295
- return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b);
7296
+ return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
7299
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7300
vshlq_n_s64 (int64x2_t __a, const int __b)
7302
- return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b);
7303
+ return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
7306
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7307
vshlq_n_u8 (uint8x16_t __a, const int __b)
7309
- return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b);
7310
+ return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
7313
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7314
vshlq_n_u16 (uint16x8_t __a, const int __b)
7316
- return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b);
7317
+ return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
7320
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7321
vshlq_n_u32 (uint32x4_t __a, const int __b)
7323
- return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b);
7324
+ return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
7327
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7328
vshlq_n_u64 (uint64x2_t __a, const int __b)
7330
- return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b);
7331
+ return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
7334
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7335
vshld_n_s64 (int64x1_t __a, const int __b)
7337
- return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
7338
+ return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
7341
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7342
vshld_n_u64 (uint64x1_t __a, const int __b)
7344
- return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b);
7345
+ return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
7348
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7349
@@ -23748,109 +23381,109 @@
7350
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7351
vshr_n_s8 (int8x8_t __a, const int __b)
7353
- return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b);
7354
+ return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
7357
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7358
vshr_n_s16 (int16x4_t __a, const int __b)
7360
- return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b);
7361
+ return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
7364
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7365
vshr_n_s32 (int32x2_t __a, const int __b)
7367
- return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b);
7368
+ return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
7371
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7372
vshr_n_s64 (int64x1_t __a, const int __b)
7374
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
7375
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
7378
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7379
vshr_n_u8 (uint8x8_t __a, const int __b)
7381
- return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b);
7382
+ return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
7385
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7386
vshr_n_u16 (uint16x4_t __a, const int __b)
7388
- return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b);
7389
+ return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
7392
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7393
vshr_n_u32 (uint32x2_t __a, const int __b)
7395
- return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b);
7396
+ return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
7399
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7400
vshr_n_u64 (uint64x1_t __a, const int __b)
7402
- return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b);
7403
+ return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
7406
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7407
vshrq_n_s8 (int8x16_t __a, const int __b)
7409
- return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b);
7410
+ return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
7413
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7414
vshrq_n_s16 (int16x8_t __a, const int __b)
7416
- return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b);
7417
+ return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
7420
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7421
vshrq_n_s32 (int32x4_t __a, const int __b)
7423
- return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b);
7424
+ return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
7427
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7428
vshrq_n_s64 (int64x2_t __a, const int __b)
7430
- return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b);
7431
+ return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
7434
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7435
vshrq_n_u8 (uint8x16_t __a, const int __b)
7437
- return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b);
7438
+ return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
7441
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7442
vshrq_n_u16 (uint16x8_t __a, const int __b)
7444
- return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b);
7445
+ return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
7448
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7449
vshrq_n_u32 (uint32x4_t __a, const int __b)
7451
- return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b);
7452
+ return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
7455
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7456
vshrq_n_u64 (uint64x2_t __a, const int __b)
7458
- return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b);
7459
+ return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
7462
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7463
vshrd_n_s64 (int64x1_t __a, const int __b)
7465
- return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
7466
+ return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
7469
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7470
vshrd_n_u64 (uint64x1_t __a, const int __b)
7472
- return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b);
7473
+ return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
7477
--- a/src/gcc/config/aarch64/aarch64.md
7478
+++ b/src/gcc/config/aarch64/aarch64.md
7480
(define_c_enum "unspec" [
7503
@@ -763,19 +770,23 @@
7506
(define_insn "*mov<mode>_aarch64"
7507
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,r,m, r,*w")
7508
- (match_operand:SHORT 1 "general_operand" " r,M,m,rZ,*w,r"))]
7509
+ [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w")
7510
+ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
7511
"(register_operand (operands[0], <MODE>mode)
7512
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
7516
+ movi\\t%0.<Vallxd>, %1
7518
+ ldr\\t%<size>0, %1
7520
+ str\\t%<size>1, %0
7521
umov\\t%w0, %1.<v>[0]
7522
- dup\\t%0.<Vallxd>, %w1"
7523
- [(set_attr "v8type" "move,alu,load1,store1,*,*")
7524
- (set_attr "simd_type" "*,*,*,*,simd_movgp,simd_dupgp")
7525
+ dup\\t%0.<Vallxd>, %w1
7526
+ dup\\t%0, %1.<v>[0]"
7527
+ [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
7528
+ (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
7529
(set_attr "mode" "<MODE>")
7530
(set_attr "simd_mode" "<MODE>")]
7532
@@ -797,26 +808,28 @@
7535
(define_insn "*movsi_aarch64"
7536
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m, *w, r,*w")
7537
- (match_operand:SI 1 "aarch64_mov_operand" " r,M,m,rZ,rZ,*w,*w"))]
7538
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,*w,m, m,*w, r,*w")
7539
+ (match_operand:SI 1 "aarch64_mov_operand" " r,M,m, m,rZ,*w,rZ,*w,*w"))]
7540
"(register_operand (operands[0], SImode)
7541
|| aarch64_reg_or_zero (operands[1], SImode))"
7552
- [(set_attr "v8type" "move,alu,load1,store1,fmov,fmov,fmov")
7553
+ [(set_attr "v8type" "move,alu,load1,load1,store1,store1,fmov,fmov,fmov")
7554
(set_attr "mode" "SI")
7555
- (set_attr "fp" "*,*,*,*,yes,yes,yes")]
7556
+ (set_attr "fp" "*,*,*,*,*,*,yes,yes,yes")]
7559
(define_insn "*movdi_aarch64"
7560
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,m, r, r, *w, r,*w,w")
7561
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m,rZ,Usa,Ush,rZ,*w,*w,Dd"))]
7562
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r, r, *w, r,*w,w")
7563
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,Usa,Ush,rZ,*w,*w,Dd"))]
7564
"(register_operand (operands[0], DImode)
7565
|| aarch64_reg_or_zero (operands[1], DImode))"
7567
@@ -825,16 +838,18 @@
7580
- [(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov")
7581
+ [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
7582
(set_attr "mode" "DI")
7583
- (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
7584
+ (set_attr "fp" "*,*,*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
7587
(define_insn "insv_imm<mode>"
7588
@@ -1149,13 +1164,14 @@
7591
(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
7592
- [(set (match_operand:GPI 0 "register_operand" "=r,r")
7593
- (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
7594
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
7595
+ (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
7598
uxt<SHORT:size>\t%<GPI:w>0, %w1
7599
- ldr<SHORT:size>\t%w0, %1"
7600
- [(set_attr "v8type" "extend,load1")
7601
+ ldr<SHORT:size>\t%w0, %1
7602
+ ldr\t%<SHORT:size>0, %1"
7603
+ [(set_attr "v8type" "extend,load1,load1")
7604
(set_attr "mode" "<GPI:MODE>")]
7607
@@ -1286,6 +1302,112 @@
7608
(set_attr "mode" "SI")]
7611
+(define_insn "*adds_mul_imm_<mode>"
7612
+ [(set (reg:CC_NZ CC_REGNUM)
7614
+ (plus:GPI (mult:GPI
7615
+ (match_operand:GPI 1 "register_operand" "r")
7616
+ (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
7617
+ (match_operand:GPI 3 "register_operand" "rk"))
7619
+ (set (match_operand:GPI 0 "register_operand" "=r")
7620
+ (plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
7623
+ "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
7624
+ [(set_attr "v8type" "alus_shift")
7625
+ (set_attr "mode" "<MODE>")]
7628
+(define_insn "*subs_mul_imm_<mode>"
7629
+ [(set (reg:CC_NZ CC_REGNUM)
7631
+ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
7633
+ (match_operand:GPI 2 "register_operand" "r")
7634
+ (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
7636
+ (set (match_operand:GPI 0 "register_operand" "=r")
7637
+ (minus:GPI (match_dup 1)
7638
+ (mult:GPI (match_dup 2) (match_dup 3))))]
7640
+ "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
7641
+ [(set_attr "v8type" "alus_shift")
7642
+ (set_attr "mode" "<MODE>")]
7645
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
7646
+ [(set (reg:CC_NZ CC_REGNUM)
7649
+ (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
7650
+ (match_operand:GPI 2 "register_operand" "r"))
7652
+ (set (match_operand:GPI 0 "register_operand" "=r")
7653
+ (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
7655
+ "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
7656
+ [(set_attr "v8type" "alus_ext")
7657
+ (set_attr "mode" "<GPI:MODE>")]
7660
+(define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
7661
+ [(set (reg:CC_NZ CC_REGNUM)
7663
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
7665
+ (match_operand:ALLX 2 "register_operand" "r")))
7667
+ (set (match_operand:GPI 0 "register_operand" "=r")
7668
+ (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
7670
+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
7671
+ [(set_attr "v8type" "alus_ext")
7672
+ (set_attr "mode" "<GPI:MODE>")]
7675
+(define_insn "*adds_<optab><mode>_multp2"
7676
+ [(set (reg:CC_NZ CC_REGNUM)
7678
+ (plus:GPI (ANY_EXTRACT:GPI
7679
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
7680
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
7681
+ (match_operand 3 "const_int_operand" "n")
7683
+ (match_operand:GPI 4 "register_operand" "r"))
7685
+ (set (match_operand:GPI 0 "register_operand" "=r")
7686
+ (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
7690
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
7691
+ "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
7692
+ [(set_attr "v8type" "alus_ext")
7693
+ (set_attr "mode" "<MODE>")]
7696
+(define_insn "*subs_<optab><mode>_multp2"
7697
+ [(set (reg:CC_NZ CC_REGNUM)
7699
+ (minus:GPI (match_operand:GPI 4 "register_operand" "r")
7701
+ (mult:GPI (match_operand:GPI 1 "register_operand" "r")
7702
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
7703
+ (match_operand 3 "const_int_operand" "n")
7706
+ (set (match_operand:GPI 0 "register_operand" "=r")
7707
+ (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
7708
+ (mult:GPI (match_dup 1) (match_dup 2))
7711
+ "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
7712
+ "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
7713
+ [(set_attr "v8type" "alus_ext")
7714
+ (set_attr "mode" "<MODE>")]
7717
(define_insn "*add<mode>3nr_compare0"
7718
[(set (reg:CC_NZ CC_REGNUM)
7720
@@ -1790,6 +1912,34 @@
7721
(set_attr "mode" "SI")]
7724
+(define_insn "*sub<mode>3_carryin"
7726
+ (match_operand:GPI 0 "register_operand" "=r")
7727
+ (minus:GPI (minus:GPI
7728
+ (match_operand:GPI 1 "register_operand" "r")
7729
+ (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
7730
+ (match_operand:GPI 2 "register_operand" "r")))]
7732
+ "sbc\\t%<w>0, %<w>1, %<w>2"
7733
+ [(set_attr "v8type" "adc")
7734
+ (set_attr "mode" "<MODE>")]
7737
+;; zero_extend version of the above
7738
+(define_insn "*subsi3_carryin_uxtw"
7740
+ (match_operand:DI 0 "register_operand" "=r")
7742
+ (minus:SI (minus:SI
7743
+ (match_operand:SI 1 "register_operand" "r")
7744
+ (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
7745
+ (match_operand:SI 2 "register_operand" "r"))))]
7747
+ "sbc\\t%w0, %w1, %w2"
7748
+ [(set_attr "v8type" "adc")
7749
+ (set_attr "mode" "SI")]
7752
(define_insn "*sub_uxt<mode>_multp2"
7753
[(set (match_operand:GPI 0 "register_operand" "=rk")
7754
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
7755
@@ -1843,6 +1993,27 @@
7756
(set_attr "mode" "SI")]
7759
+(define_insn "*ngc<mode>"
7760
+ [(set (match_operand:GPI 0 "register_operand" "=r")
7761
+ (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
7762
+ (match_operand:GPI 1 "register_operand" "r")))]
7764
+ "ngc\\t%<w>0, %<w>1"
7765
+ [(set_attr "v8type" "adc")
7766
+ (set_attr "mode" "<MODE>")]
7769
+(define_insn "*ngcsi_uxtw"
7770
+ [(set (match_operand:DI 0 "register_operand" "=r")
7772
+ (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
7773
+ (match_operand:SI 1 "register_operand" "r"))))]
7776
+ [(set_attr "v8type" "adc")
7777
+ (set_attr "mode" "SI")]
7780
(define_insn "*neg<mode>2_compare0"
7781
[(set (reg:CC_NZ CC_REGNUM)
7782
(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
7783
@@ -2157,6 +2328,18 @@
7784
(set_attr "mode" "<GPI:MODE>")]
7787
+(define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
7788
+ [(set (reg:CC_SWP CC_REGNUM)
7789
+ (compare:CC_SWP (ashift:GPI
7791
+ (match_operand:ALLX 0 "register_operand" "r"))
7792
+ (match_operand:QI 1 "aarch64_shift_imm_<mode>" "n"))
7793
+ (match_operand:GPI 2 "register_operand" "r")))]
7795
+ "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
7796
+ [(set_attr "v8type" "alus_ext")
7797
+ (set_attr "mode" "<GPI:MODE>")]
7800
;; -------------------------------------------------------------------
7801
;; Store-flag and conditional select insns
7802
@@ -2433,6 +2616,69 @@
7803
[(set_attr "v8type" "logic,logic_imm")
7804
(set_attr "mode" "SI")])
7806
+(define_insn "*and<mode>3_compare0"
7807
+ [(set (reg:CC_NZ CC_REGNUM)
7809
+ (and:GPI (match_operand:GPI 1 "register_operand" "%r,r")
7810
+ (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>"))
7812
+ (set (match_operand:GPI 0 "register_operand" "=r,r")
7813
+ (and:GPI (match_dup 1) (match_dup 2)))]
7815
+ "ands\\t%<w>0, %<w>1, %<w>2"
7816
+ [(set_attr "v8type" "logics,logics_imm")
7817
+ (set_attr "mode" "<MODE>")]
7820
+;; zero_extend version of above
7821
+(define_insn "*andsi3_compare0_uxtw"
7822
+ [(set (reg:CC_NZ CC_REGNUM)
7824
+ (and:SI (match_operand:SI 1 "register_operand" "%r,r")
7825
+ (match_operand:SI 2 "aarch64_logical_operand" "r,K"))
7827
+ (set (match_operand:DI 0 "register_operand" "=r,r")
7828
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
7830
+ "ands\\t%w0, %w1, %w2"
7831
+ [(set_attr "v8type" "logics,logics_imm")
7832
+ (set_attr "mode" "SI")]
7835
+(define_insn "*and_<SHIFT:optab><mode>3_compare0"
7836
+ [(set (reg:CC_NZ CC_REGNUM)
7838
+ (and:GPI (SHIFT:GPI
7839
+ (match_operand:GPI 1 "register_operand" "r")
7840
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
7841
+ (match_operand:GPI 3 "register_operand" "r"))
7843
+ (set (match_operand:GPI 0 "register_operand" "=r")
7844
+ (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
7846
+ "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
7847
+ [(set_attr "v8type" "logics_shift")
7848
+ (set_attr "mode" "<MODE>")]
7851
+;; zero_extend version of above
7852
+(define_insn "*and_<SHIFT:optab>si3_compare0_uxtw"
7853
+ [(set (reg:CC_NZ CC_REGNUM)
7856
+ (match_operand:SI 1 "register_operand" "r")
7857
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
7858
+ (match_operand:SI 3 "register_operand" "r"))
7860
+ (set (match_operand:DI 0 "register_operand" "=r")
7861
+ (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2))
7864
+ "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
7865
+ [(set_attr "v8type" "logics_shift")
7866
+ (set_attr "mode" "SI")]
7869
(define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
7870
[(set (match_operand:GPI 0 "register_operand" "=r")
7871
(LOGICAL:GPI (SHIFT:GPI
7872
@@ -2703,6 +2949,62 @@
7873
(set_attr "mode" "<MODE>")]
7876
+(define_insn "*extr<mode>5_insn"
7877
+ [(set (match_operand:GPI 0 "register_operand" "=r")
7878
+ (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
7879
+ (match_operand 3 "const_int_operand" "n"))
7880
+ (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
7881
+ (match_operand 4 "const_int_operand" "n"))))]
7882
+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
7883
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
7884
+ "extr\\t%<w>0, %<w>1, %<w>2, %4"
7885
+ [(set_attr "v8type" "shift")
7886
+ (set_attr "mode" "<MODE>")]
7889
+;; zero_extend version of the above
7890
+(define_insn "*extrsi5_insn_uxtw"
7891
+ [(set (match_operand:DI 0 "register_operand" "=r")
7893
+ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
7894
+ (match_operand 3 "const_int_operand" "n"))
7895
+ (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
7896
+ (match_operand 4 "const_int_operand" "n")))))]
7897
+ "UINTVAL (operands[3]) < 32 &&
7898
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
7899
+ "extr\\t%w0, %w1, %w2, %4"
7900
+ [(set_attr "v8type" "shift")
7901
+ (set_attr "mode" "SI")]
7904
+(define_insn "*ror<mode>3_insn"
7905
+ [(set (match_operand:GPI 0 "register_operand" "=r")
7906
+ (rotate:GPI (match_operand:GPI 1 "register_operand" "r")
7907
+ (match_operand 2 "const_int_operand" "n")))]
7908
+ "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
7910
+ operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
7911
+ return "ror\\t%<w>0, %<w>1, %3";
7913
+ [(set_attr "v8type" "shift")
7914
+ (set_attr "mode" "<MODE>")]
7917
+;; zero_extend version of the above
7918
+(define_insn "*rorsi3_insn_uxtw"
7919
+ [(set (match_operand:DI 0 "register_operand" "=r")
7921
+ (rotate:SI (match_operand:SI 1 "register_operand" "r")
7922
+ (match_operand 2 "const_int_operand" "n"))))]
7923
+ "UINTVAL (operands[2]) < 32"
7925
+ operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
7926
+ return "ror\\t%w0, %w1, %3";
7928
+ [(set_attr "v8type" "shift")
7929
+ (set_attr "mode" "SI")]
7932
(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
7933
[(set (match_operand:GPI 0 "register_operand" "=r")
7935
@@ -3089,6 +3391,27 @@
7936
(set_attr "mode" "<MODE>")]
7939
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
7940
+ [(set (match_operand:GPF 0 "register_operand" "=w")
7941
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
7944
+ "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
7945
+ [(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
7946
+ (set_attr "mode" "<MODE>")]
7949
+(define_insn "aarch64_frecps<mode>"
7950
+ [(set (match_operand:GPF 0 "register_operand" "=w")
7951
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
7952
+ (match_operand:GPF 2 "register_operand" "w")]
7955
+ "frecps\\t%<s>0, %<s>1, %<s>2"
7956
+ [(set_attr "v8type" "frecps")
7957
+ (set_attr "mode" "<MODE>")]
7960
;; -------------------------------------------------------------------
7962
;; -------------------------------------------------------------------
7963
--- a/src/gcc/config/aarch64/aarch64-builtins.c
7964
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
7966
#include "langhooks.h"
7967
#include "diagnostic-core.h"
7969
+#include "gimple.h"
7971
enum aarch64_simd_builtin_type_mode
7989
@@ -128,123 +131,133 @@
7991
} aarch64_simd_builtin_datum;
7993
-#define CF(N, X) CODE_FOR_aarch64_##N##X
7994
+#define CF0(N, X) CODE_FOR_aarch64_##N##X
7995
+#define CF1(N, X) CODE_FOR_##N##X##1
7996
+#define CF2(N, X) CODE_FOR_##N##X##2
7997
+#define CF3(N, X) CODE_FOR_##N##X##3
7998
+#define CF4(N, X) CODE_FOR_##N##X##4
7999
+#define CF10(N, X) CODE_FOR_##N##X
8001
-#define VAR1(T, N, A) \
8002
- {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0},
8003
-#define VAR2(T, N, A, B) \
8006
-#define VAR3(T, N, A, B, C) \
8007
- VAR2 (T, N, A, B) \
8009
-#define VAR4(T, N, A, B, C, D) \
8010
- VAR3 (T, N, A, B, C) \
8012
-#define VAR5(T, N, A, B, C, D, E) \
8013
- VAR4 (T, N, A, B, C, D) \
8015
-#define VAR6(T, N, A, B, C, D, E, F) \
8016
- VAR5 (T, N, A, B, C, D, E) \
8018
-#define VAR7(T, N, A, B, C, D, E, F, G) \
8019
- VAR6 (T, N, A, B, C, D, E, F) \
8021
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
8022
- VAR7 (T, N, A, B, C, D, E, F, G) \
8024
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
8025
- VAR8 (T, N, A, B, C, D, E, F, G, H) \
8027
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
8028
- VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
8030
-#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
8031
- VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
8033
-#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
8034
- VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
8036
+#define VAR1(T, N, MAP, A) \
8037
+ {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0},
8038
+#define VAR2(T, N, MAP, A, B) \
8039
+ VAR1 (T, N, MAP, A) \
8040
+ VAR1 (T, N, MAP, B)
8041
+#define VAR3(T, N, MAP, A, B, C) \
8042
+ VAR2 (T, N, MAP, A, B) \
8043
+ VAR1 (T, N, MAP, C)
8044
+#define VAR4(T, N, MAP, A, B, C, D) \
8045
+ VAR3 (T, N, MAP, A, B, C) \
8046
+ VAR1 (T, N, MAP, D)
8047
+#define VAR5(T, N, MAP, A, B, C, D, E) \
8048
+ VAR4 (T, N, MAP, A, B, C, D) \
8049
+ VAR1 (T, N, MAP, E)
8050
+#define VAR6(T, N, MAP, A, B, C, D, E, F) \
8051
+ VAR5 (T, N, MAP, A, B, C, D, E) \
8052
+ VAR1 (T, N, MAP, F)
8053
+#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
8054
+ VAR6 (T, N, MAP, A, B, C, D, E, F) \
8055
+ VAR1 (T, N, MAP, G)
8056
+#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
8057
+ VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
8058
+ VAR1 (T, N, MAP, H)
8059
+#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
8060
+ VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
8061
+ VAR1 (T, N, MAP, I)
8062
+#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
8063
+ VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
8064
+ VAR1 (T, N, MAP, J)
8065
+#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
8066
+ VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
8067
+ VAR1 (T, N, MAP, K)
8068
+#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
8069
+ VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
8070
+ VAR1 (T, N, MAP, L)
8072
/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
8073
modes as is given for each define_mode_iterator in
8074
config/aarch64/iterators.md. */
8076
-#define BUILTIN_DX(T, N) \
8077
- VAR2 (T, N, di, df)
8078
-#define BUILTIN_SDQ_I(T, N) \
8079
- VAR4 (T, N, qi, hi, si, di)
8080
-#define BUILTIN_SD_HSI(T, N) \
8081
- VAR2 (T, N, hi, si)
8082
-#define BUILTIN_V2F(T, N) \
8083
- VAR2 (T, N, v2sf, v2df)
8084
-#define BUILTIN_VALL(T, N) \
8085
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df)
8086
-#define BUILTIN_VB(T, N) \
8087
- VAR2 (T, N, v8qi, v16qi)
8088
-#define BUILTIN_VD(T, N) \
8089
- VAR4 (T, N, v8qi, v4hi, v2si, v2sf)
8090
-#define BUILTIN_VDC(T, N) \
8091
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
8092
-#define BUILTIN_VDIC(T, N) \
8093
- VAR3 (T, N, v8qi, v4hi, v2si)
8094
-#define BUILTIN_VDN(T, N) \
8095
- VAR3 (T, N, v4hi, v2si, di)
8096
-#define BUILTIN_VDQ(T, N) \
8097
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
8098
-#define BUILTIN_VDQF(T, N) \
8099
- VAR3 (T, N, v2sf, v4sf, v2df)
8100
-#define BUILTIN_VDQHS(T, N) \
8101
- VAR4 (T, N, v4hi, v8hi, v2si, v4si)
8102
-#define BUILTIN_VDQIF(T, N) \
8103
- VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
8104
-#define BUILTIN_VDQM(T, N) \
8105
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8106
-#define BUILTIN_VDQV(T, N) \
8107
- VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si)
8108
-#define BUILTIN_VDQ_BHSI(T, N) \
8109
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8110
-#define BUILTIN_VDQ_I(T, N) \
8111
- VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
8112
-#define BUILTIN_VDW(T, N) \
8113
- VAR3 (T, N, v8qi, v4hi, v2si)
8114
-#define BUILTIN_VD_BHSI(T, N) \
8115
- VAR3 (T, N, v8qi, v4hi, v2si)
8116
-#define BUILTIN_VD_HSI(T, N) \
8117
- VAR2 (T, N, v4hi, v2si)
8118
-#define BUILTIN_VD_RE(T, N) \
8119
- VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df)
8120
-#define BUILTIN_VQ(T, N) \
8121
- VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df)
8122
-#define BUILTIN_VQN(T, N) \
8123
- VAR3 (T, N, v8hi, v4si, v2di)
8124
-#define BUILTIN_VQW(T, N) \
8125
- VAR3 (T, N, v16qi, v8hi, v4si)
8126
-#define BUILTIN_VQ_HSI(T, N) \
8127
- VAR2 (T, N, v8hi, v4si)
8128
-#define BUILTIN_VQ_S(T, N) \
8129
- VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8130
-#define BUILTIN_VSDQ_HSI(T, N) \
8131
- VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si)
8132
-#define BUILTIN_VSDQ_I(T, N) \
8133
- VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
8134
-#define BUILTIN_VSDQ_I_BHSI(T, N) \
8135
- VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
8136
-#define BUILTIN_VSDQ_I_DI(T, N) \
8137
- VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
8138
-#define BUILTIN_VSD_HSI(T, N) \
8139
- VAR4 (T, N, v4hi, v2si, hi, si)
8140
-#define BUILTIN_VSQN_HSDI(T, N) \
8141
- VAR6 (T, N, v8hi, v4si, v2di, hi, si, di)
8142
-#define BUILTIN_VSTRUCT(T, N) \
8143
- VAR3 (T, N, oi, ci, xi)
8144
+#define BUILTIN_DX(T, N, MAP) \
8145
+ VAR2 (T, N, MAP, di, df)
8146
+#define BUILTIN_GPF(T, N, MAP) \
8147
+ VAR2 (T, N, MAP, sf, df)
8148
+#define BUILTIN_SDQ_I(T, N, MAP) \
8149
+ VAR4 (T, N, MAP, qi, hi, si, di)
8150
+#define BUILTIN_SD_HSI(T, N, MAP) \
8151
+ VAR2 (T, N, MAP, hi, si)
8152
+#define BUILTIN_V2F(T, N, MAP) \
8153
+ VAR2 (T, N, MAP, v2sf, v2df)
8154
+#define BUILTIN_VALL(T, N, MAP) \
8155
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
8156
+ v4si, v2di, v2sf, v4sf, v2df)
8157
+#define BUILTIN_VB(T, N, MAP) \
8158
+ VAR2 (T, N, MAP, v8qi, v16qi)
8159
+#define BUILTIN_VD(T, N, MAP) \
8160
+ VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
8161
+#define BUILTIN_VDC(T, N, MAP) \
8162
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
8163
+#define BUILTIN_VDIC(T, N, MAP) \
8164
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
8165
+#define BUILTIN_VDN(T, N, MAP) \
8166
+ VAR3 (T, N, MAP, v4hi, v2si, di)
8167
+#define BUILTIN_VDQ(T, N, MAP) \
8168
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
8169
+#define BUILTIN_VDQF(T, N, MAP) \
8170
+ VAR3 (T, N, MAP, v2sf, v4sf, v2df)
8171
+#define BUILTIN_VDQH(T, N, MAP) \
8172
+ VAR2 (T, N, MAP, v4hi, v8hi)
8173
+#define BUILTIN_VDQHS(T, N, MAP) \
8174
+ VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
8175
+#define BUILTIN_VDQIF(T, N, MAP) \
8176
+ VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
8177
+#define BUILTIN_VDQM(T, N, MAP) \
8178
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8179
+#define BUILTIN_VDQV(T, N, MAP) \
8180
+ VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
8181
+#define BUILTIN_VDQ_BHSI(T, N, MAP) \
8182
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8183
+#define BUILTIN_VDQ_I(T, N, MAP) \
8184
+ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
8185
+#define BUILTIN_VDW(T, N, MAP) \
8186
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
8187
+#define BUILTIN_VD_BHSI(T, N, MAP) \
8188
+ VAR3 (T, N, MAP, v8qi, v4hi, v2si)
8189
+#define BUILTIN_VD_HSI(T, N, MAP) \
8190
+ VAR2 (T, N, MAP, v4hi, v2si)
8191
+#define BUILTIN_VD_RE(T, N, MAP) \
8192
+ VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
8193
+#define BUILTIN_VQ(T, N, MAP) \
8194
+ VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
8195
+#define BUILTIN_VQN(T, N, MAP) \
8196
+ VAR3 (T, N, MAP, v8hi, v4si, v2di)
8197
+#define BUILTIN_VQW(T, N, MAP) \
8198
+ VAR3 (T, N, MAP, v16qi, v8hi, v4si)
8199
+#define BUILTIN_VQ_HSI(T, N, MAP) \
8200
+ VAR2 (T, N, MAP, v8hi, v4si)
8201
+#define BUILTIN_VQ_S(T, N, MAP) \
8202
+ VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
8203
+#define BUILTIN_VSDQ_HSI(T, N, MAP) \
8204
+ VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
8205
+#define BUILTIN_VSDQ_I(T, N, MAP) \
8206
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
8207
+#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
8208
+ VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
8209
+#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
8210
+ VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
8211
+#define BUILTIN_VSD_HSI(T, N, MAP) \
8212
+ VAR4 (T, N, MAP, v4hi, v2si, hi, si)
8213
+#define BUILTIN_VSQN_HSDI(T, N, MAP) \
8214
+ VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
8215
+#define BUILTIN_VSTRUCT(T, N, MAP) \
8216
+ VAR3 (T, N, MAP, oi, ci, xi)
8218
static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
8219
#include "aarch64-simd-builtins.def"
8223
-#define VAR1(T, N, A) \
8224
+#define VAR1(T, N, MAP, A) \
8225
AARCH64_SIMD_BUILTIN_##N##A,
8227
enum aarch64_builtins
8228
@@ -257,53 +270,6 @@
8233
-#undef BUILTIN_SDQ_I
8234
-#undef BUILTIN_SD_HSI
8236
-#undef BUILTIN_VALL
8240
-#undef BUILTIN_VDIC
8243
-#undef BUILTIN_VDQF
8244
-#undef BUILTIN_VDQHS
8245
-#undef BUILTIN_VDQIF
8246
-#undef BUILTIN_VDQM
8247
-#undef BUILTIN_VDQV
8248
-#undef BUILTIN_VDQ_BHSI
8249
-#undef BUILTIN_VDQ_I
8251
-#undef BUILTIN_VD_BHSI
8252
-#undef BUILTIN_VD_HSI
8253
-#undef BUILTIN_VD_RE
8257
-#undef BUILTIN_VQ_HSI
8258
-#undef BUILTIN_VQ_S
8259
-#undef BUILTIN_VSDQ_HSI
8260
-#undef BUILTIN_VSDQ_I
8261
-#undef BUILTIN_VSDQ_I_BHSI
8262
-#undef BUILTIN_VSDQ_I_DI
8263
-#undef BUILTIN_VSD_HSI
8264
-#undef BUILTIN_VSQN_HSDI
8265
-#undef BUILTIN_VSTRUCT
8279
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
8281
#define NUM_DREG_TYPES 6
8284
"v8qi", "v4hi", "v2si", "v2sf", "di", "df",
8285
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
8286
- "ti", "ei", "oi", "xi", "si", "hi", "qi"
8287
+ "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
8291
@@ -1258,19 +1224,19 @@
8292
&& in_mode == N##Fmode && in_n == C)
8293
case BUILT_IN_FLOOR:
8294
case BUILT_IN_FLOORF:
8295
- return AARCH64_FIND_FRINT_VARIANT (frintm);
8296
+ return AARCH64_FIND_FRINT_VARIANT (floor);
8298
case BUILT_IN_CEILF:
8299
- return AARCH64_FIND_FRINT_VARIANT (frintp);
8300
+ return AARCH64_FIND_FRINT_VARIANT (ceil);
8301
case BUILT_IN_TRUNC:
8302
case BUILT_IN_TRUNCF:
8303
- return AARCH64_FIND_FRINT_VARIANT (frintz);
8304
+ return AARCH64_FIND_FRINT_VARIANT (btrunc);
8305
case BUILT_IN_ROUND:
8306
case BUILT_IN_ROUNDF:
8307
- return AARCH64_FIND_FRINT_VARIANT (frinta);
8308
+ return AARCH64_FIND_FRINT_VARIANT (round);
8309
case BUILT_IN_NEARBYINT:
8310
case BUILT_IN_NEARBYINTF:
8311
- return AARCH64_FIND_FRINT_VARIANT (frinti);
8312
+ return AARCH64_FIND_FRINT_VARIANT (nearbyint);
8314
case BUILT_IN_SQRTF:
8315
return AARCH64_FIND_FRINT_VARIANT (sqrt);
8316
@@ -1279,9 +1245,51 @@
8317
(out_mode == N##Imode && out_n == C \
8318
&& in_mode == N##Fmode && in_n == C)
8319
case BUILT_IN_LFLOOR:
8320
- return AARCH64_FIND_FRINT_VARIANT (fcvtms);
8321
+ case BUILT_IN_IFLOORF:
8323
+ tree new_tree = NULL_TREE;
8324
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
8326
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2dfv2di];
8327
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
8329
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv4sfv4si];
8330
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
8332
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lfloorv2sfv2si];
8335
case BUILT_IN_LCEIL:
8336
- return AARCH64_FIND_FRINT_VARIANT (fcvtps);
8337
+ case BUILT_IN_ICEILF:
8339
+ tree new_tree = NULL_TREE;
8340
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
8342
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2dfv2di];
8343
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
8345
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv4sfv4si];
8346
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
8348
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lceilv2sfv2si];
8351
+ case BUILT_IN_LROUND:
8352
+ case BUILT_IN_IROUNDF:
8354
+ tree new_tree = NULL_TREE;
8355
+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
8357
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2dfv2di];
8358
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
8360
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv4sfv4si];
8361
+ else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
8363
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_lroundv2sfv2si];
8370
@@ -1289,5 +1297,130 @@
8376
+#define VAR1(T, N, MAP, A) \
8377
+ case AARCH64_SIMD_BUILTIN_##N##A:
8380
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
8381
+ bool ignore ATTRIBUTE_UNUSED)
8383
+ int fcode = DECL_FUNCTION_CODE (fndecl);
8384
+ tree type = TREE_TYPE (TREE_TYPE (fndecl));
8388
+ BUILTIN_VDQF (UNOP, abs, 2)
8389
+ return fold_build1 (ABS_EXPR, type, args[0]);
8391
+ VAR1 (UNOP, floatv2si, 2, v2sf)
8392
+ VAR1 (UNOP, floatv4si, 2, v4sf)
8393
+ VAR1 (UNOP, floatv2di, 2, v2df)
8394
+ return fold_build1 (FLOAT_EXPR, type, args[0]);
8404
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
8406
+ bool changed = false;
8407
+ gimple stmt = gsi_stmt (*gsi);
8408
+ tree call = gimple_call_fn (stmt);
8410
+ gimple new_stmt = NULL;
8413
+ fndecl = gimple_call_fndecl (stmt);
8416
+ int fcode = DECL_FUNCTION_CODE (fndecl);
8417
+ int nargs = gimple_call_num_args (stmt);
8418
+ tree *args = (nargs > 0
8419
+ ? gimple_call_arg_ptr (stmt, 0)
8420
+ : &error_mark_node);
8424
+ BUILTIN_VDQF (UNOP, addv, 0)
8425
+ new_stmt = gimple_build_assign_with_ops (
8427
+ gimple_call_lhs (stmt),
8439
+ gsi_replace (gsi, new_stmt, true);
8446
#undef AARCH64_CHECK_BUILTIN_MODE
8447
#undef AARCH64_FIND_FRINT_VARIANT
8449
+#undef BUILTIN_SDQ_I
8450
+#undef BUILTIN_SD_HSI
8452
+#undef BUILTIN_VALL
8456
+#undef BUILTIN_VDIC
8459
+#undef BUILTIN_VDQF
8460
+#undef BUILTIN_VDQH
8461
+#undef BUILTIN_VDQHS
8462
+#undef BUILTIN_VDQIF
8463
+#undef BUILTIN_VDQM
8464
+#undef BUILTIN_VDQV
8465
+#undef BUILTIN_VDQ_BHSI
8466
+#undef BUILTIN_VDQ_I
8468
+#undef BUILTIN_VD_BHSI
8469
+#undef BUILTIN_VD_HSI
8470
+#undef BUILTIN_VD_RE
8474
+#undef BUILTIN_VQ_HSI
8475
+#undef BUILTIN_VQ_S
8476
+#undef BUILTIN_VSDQ_HSI
8477
+#undef BUILTIN_VSDQ_I
8478
+#undef BUILTIN_VSDQ_I_BHSI
8479
+#undef BUILTIN_VSDQ_I_DI
8480
+#undef BUILTIN_VSD_HSI
8481
+#undef BUILTIN_VSQN_HSDI
8482
+#undef BUILTIN_VSTRUCT
8501
--- a/src/gcc/config/aarch64/aarch64-protos.h
8502
+++ b/src/gcc/config/aarch64/aarch64-protos.h
8504
bool aarch64_float_const_zero_rtx_p (rtx);
8505
bool aarch64_function_arg_regno_p (unsigned);
8506
bool aarch64_gen_movmemqi (rtx *);
8507
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
8508
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
8509
bool aarch64_is_long_call_p (rtx);
8510
bool aarch64_label_mentioned_p (rtx);
8512
bool aarch64_regno_ok_for_index_p (int, bool);
8513
bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
8514
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
8515
+bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
8516
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
8517
bool aarch64_symbolic_address_p (rtx);
8518
bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
8520
bool aarch64_simd_mem_operand_p (rtx);
8521
rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
8522
rtx aarch64_tls_get_addr (void);
8523
+tree aarch64_fold_builtin (tree, int, tree *, bool);
8524
unsigned aarch64_dbx_register_number (unsigned);
8525
unsigned aarch64_trampoline_size (void);
8526
void aarch64_asm_output_labelref (FILE *, const char *);
8527
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
8528
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
8529
@@ -18,241 +18,329 @@
8530
along with GCC; see the file COPYING3. If not see
8531
<http://www.gnu.org/licenses/>. */
8533
-/* In the list below, the BUILTIN_<ITERATOR> macros should
8534
- correspond to the iterator used to construct the instruction's
8535
- patterns in aarch64-simd.md. A helpful idiom to follow when
8536
- adding new builtins is to add a line for each pattern in the md
8537
- file. Thus, ADDP, which has one pattern defined for the VD_BHSI
8538
- iterator, and one for DImode, has two entries below. */
8539
+/* In the list below, the BUILTIN_<ITERATOR> macros expand to create
8540
+ builtins for each of the modes described by <ITERATOR>. When adding
8541
+ new builtins to this list, a helpful idiom to follow is to add
8542
+ a line for each pattern in the md file. Thus, ADDP, which has one
8543
+ pattern defined for the VD_BHSI iterator, and one for DImode, has two
8546
- BUILTIN_VD_RE (CREATE, create)
8547
- BUILTIN_VQ_S (GETLANE, get_lane_signed)
8548
- BUILTIN_VDQ (GETLANE, get_lane_unsigned)
8549
- BUILTIN_VDQF (GETLANE, get_lane)
8550
- VAR1 (GETLANE, get_lane, di)
8551
- BUILTIN_VDC (COMBINE, combine)
8552
- BUILTIN_VB (BINOP, pmul)
8553
- BUILTIN_VDQF (UNOP, sqrt)
8554
- BUILTIN_VD_BHSI (BINOP, addp)
8555
- VAR1 (UNOP, addp, di)
8556
+ Parameter 1 is the 'type' of the intrinsic. This is used to
8557
+ describe the type modifiers (for example; unsigned) applied to
8558
+ each of the parameters to the intrinsic function.
8560
- BUILTIN_VD_RE (REINTERP, reinterpretdi)
8561
- BUILTIN_VDC (REINTERP, reinterpretv8qi)
8562
- BUILTIN_VDC (REINTERP, reinterpretv4hi)
8563
- BUILTIN_VDC (REINTERP, reinterpretv2si)
8564
- BUILTIN_VDC (REINTERP, reinterpretv2sf)
8565
- BUILTIN_VQ (REINTERP, reinterpretv16qi)
8566
- BUILTIN_VQ (REINTERP, reinterpretv8hi)
8567
- BUILTIN_VQ (REINTERP, reinterpretv4si)
8568
- BUILTIN_VQ (REINTERP, reinterpretv4sf)
8569
- BUILTIN_VQ (REINTERP, reinterpretv2di)
8570
- BUILTIN_VQ (REINTERP, reinterpretv2df)
8571
+ Parameter 2 is the name of the intrinsic. This is appended
8572
+ to `__builtin_aarch64_<name><mode>` to give the intrinsic name
8573
+ as exported to the front-ends.
8575
- BUILTIN_VDQ_I (BINOP, dup_lane)
8576
- BUILTIN_SDQ_I (BINOP, dup_lane)
8577
+ Parameter 3 describes how to map from the name to the CODE_FOR_
8578
+ macro holding the RTL pattern for the intrinsic. This mapping is:
8579
+ 0 - CODE_FOR_aarch64_<name><mode>
8580
+ 1-9 - CODE_FOR_<name><mode><1-9>
8581
+ 10 - CODE_FOR_<name><mode>. */
8583
+ BUILTIN_VD_RE (CREATE, create, 0)
8584
+ BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
8585
+ BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
8586
+ BUILTIN_VDQF (GETLANE, get_lane, 0)
8587
+ VAR1 (GETLANE, get_lane, 0, di)
8588
+ BUILTIN_VDC (COMBINE, combine, 0)
8589
+ BUILTIN_VB (BINOP, pmul, 0)
8590
+ BUILTIN_VDQF (UNOP, sqrt, 2)
8591
+ BUILTIN_VD_BHSI (BINOP, addp, 0)
8592
+ VAR1 (UNOP, addp, 0, di)
8594
+ BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
8595
+ BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
8596
+ BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
8597
+ BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
8598
+ BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
8599
+ BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
8600
+ BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
8601
+ BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
8602
+ BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
8603
+ BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
8604
+ BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
8606
+ BUILTIN_VDQ_I (BINOP, dup_lane, 0)
8607
+ BUILTIN_SDQ_I (BINOP, dup_lane, 0)
8608
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
8609
- BUILTIN_VSDQ_I (BINOP, sqshl)
8610
- BUILTIN_VSDQ_I (BINOP, uqshl)
8611
- BUILTIN_VSDQ_I (BINOP, sqrshl)
8612
- BUILTIN_VSDQ_I (BINOP, uqrshl)
8613
+ BUILTIN_VSDQ_I (BINOP, sqshl, 0)
8614
+ BUILTIN_VSDQ_I (BINOP, uqshl, 0)
8615
+ BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
8616
+ BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
8617
/* Implemented by aarch64_<su_optab><optab><mode>. */
8618
- BUILTIN_VSDQ_I (BINOP, sqadd)
8619
- BUILTIN_VSDQ_I (BINOP, uqadd)
8620
- BUILTIN_VSDQ_I (BINOP, sqsub)
8621
- BUILTIN_VSDQ_I (BINOP, uqsub)
8622
+ BUILTIN_VSDQ_I (BINOP, sqadd, 0)
8623
+ BUILTIN_VSDQ_I (BINOP, uqadd, 0)
8624
+ BUILTIN_VSDQ_I (BINOP, sqsub, 0)
8625
+ BUILTIN_VSDQ_I (BINOP, uqsub, 0)
8626
/* Implemented by aarch64_<sur>qadd<mode>. */
8627
- BUILTIN_VSDQ_I (BINOP, suqadd)
8628
- BUILTIN_VSDQ_I (BINOP, usqadd)
8629
+ BUILTIN_VSDQ_I (BINOP, suqadd, 0)
8630
+ BUILTIN_VSDQ_I (BINOP, usqadd, 0)
8632
/* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
8633
- BUILTIN_VDC (GETLANE, get_dregoi)
8634
- BUILTIN_VDC (GETLANE, get_dregci)
8635
- BUILTIN_VDC (GETLANE, get_dregxi)
8636
+ BUILTIN_VDC (GETLANE, get_dregoi, 0)
8637
+ BUILTIN_VDC (GETLANE, get_dregci, 0)
8638
+ BUILTIN_VDC (GETLANE, get_dregxi, 0)
8639
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
8640
- BUILTIN_VQ (GETLANE, get_qregoi)
8641
- BUILTIN_VQ (GETLANE, get_qregci)
8642
- BUILTIN_VQ (GETLANE, get_qregxi)
8643
+ BUILTIN_VQ (GETLANE, get_qregoi, 0)
8644
+ BUILTIN_VQ (GETLANE, get_qregci, 0)
8645
+ BUILTIN_VQ (GETLANE, get_qregxi, 0)
8646
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
8647
- BUILTIN_VQ (SETLANE, set_qregoi)
8648
- BUILTIN_VQ (SETLANE, set_qregci)
8649
- BUILTIN_VQ (SETLANE, set_qregxi)
8650
+ BUILTIN_VQ (SETLANE, set_qregoi, 0)
8651
+ BUILTIN_VQ (SETLANE, set_qregci, 0)
8652
+ BUILTIN_VQ (SETLANE, set_qregxi, 0)
8653
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
8654
- BUILTIN_VDC (LOADSTRUCT, ld2)
8655
- BUILTIN_VDC (LOADSTRUCT, ld3)
8656
- BUILTIN_VDC (LOADSTRUCT, ld4)
8657
+ BUILTIN_VDC (LOADSTRUCT, ld2, 0)
8658
+ BUILTIN_VDC (LOADSTRUCT, ld3, 0)
8659
+ BUILTIN_VDC (LOADSTRUCT, ld4, 0)
8660
/* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */
8661
- BUILTIN_VQ (LOADSTRUCT, ld2)
8662
- BUILTIN_VQ (LOADSTRUCT, ld3)
8663
- BUILTIN_VQ (LOADSTRUCT, ld4)
8664
+ BUILTIN_VQ (LOADSTRUCT, ld2, 0)
8665
+ BUILTIN_VQ (LOADSTRUCT, ld3, 0)
8666
+ BUILTIN_VQ (LOADSTRUCT, ld4, 0)
8667
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
8668
- BUILTIN_VDC (STORESTRUCT, st2)
8669
- BUILTIN_VDC (STORESTRUCT, st3)
8670
- BUILTIN_VDC (STORESTRUCT, st4)
8671
+ BUILTIN_VDC (STORESTRUCT, st2, 0)
8672
+ BUILTIN_VDC (STORESTRUCT, st3, 0)
8673
+ BUILTIN_VDC (STORESTRUCT, st4, 0)
8674
/* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */
8675
- BUILTIN_VQ (STORESTRUCT, st2)
8676
- BUILTIN_VQ (STORESTRUCT, st3)
8677
- BUILTIN_VQ (STORESTRUCT, st4)
8678
+ BUILTIN_VQ (STORESTRUCT, st2, 0)
8679
+ BUILTIN_VQ (STORESTRUCT, st3, 0)
8680
+ BUILTIN_VQ (STORESTRUCT, st4, 0)
8682
- BUILTIN_VQW (BINOP, saddl2)
8683
- BUILTIN_VQW (BINOP, uaddl2)
8684
- BUILTIN_VQW (BINOP, ssubl2)
8685
- BUILTIN_VQW (BINOP, usubl2)
8686
- BUILTIN_VQW (BINOP, saddw2)
8687
- BUILTIN_VQW (BINOP, uaddw2)
8688
- BUILTIN_VQW (BINOP, ssubw2)
8689
- BUILTIN_VQW (BINOP, usubw2)
8690
+ BUILTIN_VQW (BINOP, saddl2, 0)
8691
+ BUILTIN_VQW (BINOP, uaddl2, 0)
8692
+ BUILTIN_VQW (BINOP, ssubl2, 0)
8693
+ BUILTIN_VQW (BINOP, usubl2, 0)
8694
+ BUILTIN_VQW (BINOP, saddw2, 0)
8695
+ BUILTIN_VQW (BINOP, uaddw2, 0)
8696
+ BUILTIN_VQW (BINOP, ssubw2, 0)
8697
+ BUILTIN_VQW (BINOP, usubw2, 0)
8698
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */
8699
- BUILTIN_VDW (BINOP, saddl)
8700
- BUILTIN_VDW (BINOP, uaddl)
8701
- BUILTIN_VDW (BINOP, ssubl)
8702
- BUILTIN_VDW (BINOP, usubl)
8703
+ BUILTIN_VDW (BINOP, saddl, 0)
8704
+ BUILTIN_VDW (BINOP, uaddl, 0)
8705
+ BUILTIN_VDW (BINOP, ssubl, 0)
8706
+ BUILTIN_VDW (BINOP, usubl, 0)
8707
/* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */
8708
- BUILTIN_VDW (BINOP, saddw)
8709
- BUILTIN_VDW (BINOP, uaddw)
8710
- BUILTIN_VDW (BINOP, ssubw)
8711
- BUILTIN_VDW (BINOP, usubw)
8712
+ BUILTIN_VDW (BINOP, saddw, 0)
8713
+ BUILTIN_VDW (BINOP, uaddw, 0)
8714
+ BUILTIN_VDW (BINOP, ssubw, 0)
8715
+ BUILTIN_VDW (BINOP, usubw, 0)
8716
/* Implemented by aarch64_<sur>h<addsub><mode>. */
8717
- BUILTIN_VQ_S (BINOP, shadd)
8718
- BUILTIN_VQ_S (BINOP, uhadd)
8719
- BUILTIN_VQ_S (BINOP, srhadd)
8720
- BUILTIN_VQ_S (BINOP, urhadd)
8721
+ BUILTIN_VQ_S (BINOP, shadd, 0)
8722
+ BUILTIN_VQ_S (BINOP, uhadd, 0)
8723
+ BUILTIN_VQ_S (BINOP, srhadd, 0)
8724
+ BUILTIN_VQ_S (BINOP, urhadd, 0)
8725
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
8726
- BUILTIN_VQN (BINOP, addhn)
8727
- BUILTIN_VQN (BINOP, raddhn)
8728
+ BUILTIN_VQN (BINOP, addhn, 0)
8729
+ BUILTIN_VQN (BINOP, raddhn, 0)
8730
/* Implemented by aarch64_<sur><addsub>hn2<mode>. */
8731
- BUILTIN_VQN (TERNOP, addhn2)
8732
- BUILTIN_VQN (TERNOP, raddhn2)
8733
+ BUILTIN_VQN (TERNOP, addhn2, 0)
8734
+ BUILTIN_VQN (TERNOP, raddhn2, 0)
8736
- BUILTIN_VSQN_HSDI (UNOP, sqmovun)
8737
+ BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
8738
/* Implemented by aarch64_<sur>qmovn<mode>. */
8739
- BUILTIN_VSQN_HSDI (UNOP, sqmovn)
8740
- BUILTIN_VSQN_HSDI (UNOP, uqmovn)
8741
+ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
8742
+ BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
8743
/* Implemented by aarch64_s<optab><mode>. */
8744
- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs)
8745
- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg)
8746
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
8747
+ BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
8749
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane)
8750
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane)
8751
- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq)
8752
- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq)
8753
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2)
8754
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2)
8755
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane)
8756
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane)
8757
- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq)
8758
- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq)
8759
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n)
8760
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n)
8761
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
8762
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
8763
+ BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
8764
+ BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
8765
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
8766
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
8767
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
8768
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
8769
+ BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
8770
+ BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
8771
+ BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
8772
+ BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
8773
/* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */
8774
- BUILTIN_VSD_HSI (TERNOP, sqdmlal)
8775
- BUILTIN_VSD_HSI (TERNOP, sqdmlsl)
8776
+ BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
8777
+ BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
8778
/* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */
8779
- BUILTIN_VD_HSI (TERNOP, sqdmlal_n)
8780
- BUILTIN_VD_HSI (TERNOP, sqdmlsl_n)
8781
+ BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
8782
+ BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
8784
- BUILTIN_VSD_HSI (BINOP, sqdmull)
8785
- BUILTIN_VSD_HSI (TERNOP, sqdmull_lane)
8786
- BUILTIN_VD_HSI (TERNOP, sqdmull_laneq)
8787
- BUILTIN_VD_HSI (BINOP, sqdmull_n)
8788
- BUILTIN_VQ_HSI (BINOP, sqdmull2)
8789
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane)
8790
- BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq)
8791
- BUILTIN_VQ_HSI (BINOP, sqdmull2_n)
8792
+ BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
8793
+ BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
8794
+ BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
8795
+ BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
8796
+ BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
8797
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
8798
+ BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0)
8799
+ BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
8800
/* Implemented by aarch64_sq<r>dmulh<mode>. */
8801
- BUILTIN_VSDQ_HSI (BINOP, sqdmulh)
8802
- BUILTIN_VSDQ_HSI (BINOP, sqrdmulh)
8803
+ BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
8804
+ BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
8805
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
8806
- BUILTIN_VDQHS (TERNOP, sqdmulh_lane)
8807
- BUILTIN_VDQHS (TERNOP, sqdmulh_laneq)
8808
- BUILTIN_VDQHS (TERNOP, sqrdmulh_lane)
8809
- BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq)
8810
- BUILTIN_SD_HSI (TERNOP, sqdmulh_lane)
8811
- BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane)
8812
+ BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
8813
+ BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
8814
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
8815
+ BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
8816
+ BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
8817
+ BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
8819
- BUILTIN_VSDQ_I_DI (BINOP, sshl_n)
8820
- BUILTIN_VSDQ_I_DI (BINOP, ushl_n)
8821
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
8822
/* Implemented by aarch64_<sur>shl<mode>. */
8823
- BUILTIN_VSDQ_I_DI (BINOP, sshl)
8824
- BUILTIN_VSDQ_I_DI (BINOP, ushl)
8825
- BUILTIN_VSDQ_I_DI (BINOP, srshl)
8826
- BUILTIN_VSDQ_I_DI (BINOP, urshl)
8827
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
8828
+ BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
8829
+ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
8830
+ BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
8832
- BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n)
8833
- BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n)
8834
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3)
8835
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
8836
/* Implemented by aarch64_<sur>shr_n<mode>. */
8837
- BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n)
8838
- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n)
8839
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
8840
+ BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
8841
/* Implemented by aarch64_<sur>sra_n<mode>. */
8842
- BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n)
8843
- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n)
8844
- BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n)
8845
- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n)
8846
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
8847
+ BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
8848
+ BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
8849
+ BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
8850
/* Implemented by aarch64_<sur>shll_n<mode>. */
8851
- BUILTIN_VDW (SHIFTIMM, sshll_n)
8852
- BUILTIN_VDW (SHIFTIMM, ushll_n)
8853
+ BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
8854
+ BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
8855
/* Implemented by aarch64_<sur>shll2_n<mode>. */
8856
- BUILTIN_VQW (SHIFTIMM, sshll2_n)
8857
- BUILTIN_VQW (SHIFTIMM, ushll2_n)
8858
+ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
8859
+ BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
8860
/* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
8861
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n)
8862
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n)
8863
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n)
8864
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n)
8865
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n)
8866
- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n)
8867
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
8868
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
8869
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
8870
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
8871
+ BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
8872
+ BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
8873
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
8874
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n)
8875
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n)
8876
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n)
8877
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n)
8878
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
8879
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
8880
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
8881
+ BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
8882
/* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
8883
- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n)
8884
- BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n)
8885
- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n)
8886
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
8887
+ BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
8888
+ BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
8890
/* Implemented by aarch64_cm<cmp><mode>. */
8891
- BUILTIN_VSDQ_I_DI (BINOP, cmeq)
8892
- BUILTIN_VSDQ_I_DI (BINOP, cmge)
8893
- BUILTIN_VSDQ_I_DI (BINOP, cmgt)
8894
- BUILTIN_VSDQ_I_DI (BINOP, cmle)
8895
- BUILTIN_VSDQ_I_DI (BINOP, cmlt)
8896
+ BUILTIN_VSDQ_I_DI (BINOP, cmeq, 0)
8897
+ BUILTIN_VSDQ_I_DI (BINOP, cmge, 0)
8898
+ BUILTIN_VSDQ_I_DI (BINOP, cmgt, 0)
8899
+ BUILTIN_VSDQ_I_DI (BINOP, cmle, 0)
8900
+ BUILTIN_VSDQ_I_DI (BINOP, cmlt, 0)
8901
/* Implemented by aarch64_cm<cmp><mode>. */
8902
- BUILTIN_VSDQ_I_DI (BINOP, cmhs)
8903
- BUILTIN_VSDQ_I_DI (BINOP, cmhi)
8904
- BUILTIN_VSDQ_I_DI (BINOP, cmtst)
8905
+ BUILTIN_VSDQ_I_DI (BINOP, cmhs, 0)
8906
+ BUILTIN_VSDQ_I_DI (BINOP, cmhi, 0)
8907
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
8909
/* Implemented by aarch64_<fmaxmin><mode>. */
8910
- BUILTIN_VDQF (BINOP, fmax)
8911
- BUILTIN_VDQF (BINOP, fmin)
8912
- /* Implemented by aarch64_<maxmin><mode>. */
8913
- BUILTIN_VDQ_BHSI (BINOP, smax)
8914
- BUILTIN_VDQ_BHSI (BINOP, smin)
8915
- BUILTIN_VDQ_BHSI (BINOP, umax)
8916
- BUILTIN_VDQ_BHSI (BINOP, umin)
8917
+ BUILTIN_VDQF (BINOP, fmax, 0)
8918
+ BUILTIN_VDQF (BINOP, fmin, 0)
8920
- /* Implemented by aarch64_frint<frint_suffix><mode>. */
8921
- BUILTIN_VDQF (UNOP, frintz)
8922
- BUILTIN_VDQF (UNOP, frintp)
8923
- BUILTIN_VDQF (UNOP, frintm)
8924
- BUILTIN_VDQF (UNOP, frinti)
8925
- BUILTIN_VDQF (UNOP, frintx)
8926
- BUILTIN_VDQF (UNOP, frinta)
8927
+ /* Implemented by aarch64_addv<mode>. */
8928
+ BUILTIN_VDQF (UNOP, addv, 0)
8930
- /* Implemented by aarch64_fcvt<frint_suffix><su><mode>. */
8931
- BUILTIN_VDQF (UNOP, fcvtzs)
8932
- BUILTIN_VDQF (UNOP, fcvtzu)
8933
- BUILTIN_VDQF (UNOP, fcvtas)
8934
- BUILTIN_VDQF (UNOP, fcvtau)
8935
- BUILTIN_VDQF (UNOP, fcvtps)
8936
- BUILTIN_VDQF (UNOP, fcvtpu)
8937
- BUILTIN_VDQF (UNOP, fcvtms)
8938
- BUILTIN_VDQF (UNOP, fcvtmu)
8939
+ /* Implemented by <maxmin><mode>3. */
8940
+ BUILTIN_VDQ_BHSI (BINOP, smax, 3)
8941
+ BUILTIN_VDQ_BHSI (BINOP, smin, 3)
8942
+ BUILTIN_VDQ_BHSI (BINOP, umax, 3)
8943
+ BUILTIN_VDQ_BHSI (BINOP, umin, 3)
8945
+ /* Implemented by <frint_pattern><mode>2. */
8946
+ BUILTIN_VDQF (UNOP, btrunc, 2)
8947
+ BUILTIN_VDQF (UNOP, ceil, 2)
8948
+ BUILTIN_VDQF (UNOP, floor, 2)
8949
+ BUILTIN_VDQF (UNOP, nearbyint, 2)
8950
+ BUILTIN_VDQF (UNOP, rint, 2)
8951
+ BUILTIN_VDQF (UNOP, round, 2)
8952
+ BUILTIN_VDQF (UNOP, frintn, 2)
8954
+ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
8955
+ VAR1 (UNOP, lbtruncv2sf, 2, v2si)
8956
+ VAR1 (UNOP, lbtruncv4sf, 2, v4si)
8957
+ VAR1 (UNOP, lbtruncv2df, 2, v2di)
8959
+ VAR1 (UNOP, lbtruncuv2sf, 2, v2si)
8960
+ VAR1 (UNOP, lbtruncuv4sf, 2, v4si)
8961
+ VAR1 (UNOP, lbtruncuv2df, 2, v2di)
8963
+ VAR1 (UNOP, lroundv2sf, 2, v2si)
8964
+ VAR1 (UNOP, lroundv4sf, 2, v4si)
8965
+ VAR1 (UNOP, lroundv2df, 2, v2di)
8966
+ /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */
8967
+ VAR1 (UNOP, lroundsf, 2, si)
8968
+ VAR1 (UNOP, lrounddf, 2, di)
8970
+ VAR1 (UNOP, lrounduv2sf, 2, v2si)
8971
+ VAR1 (UNOP, lrounduv4sf, 2, v4si)
8972
+ VAR1 (UNOP, lrounduv2df, 2, v2di)
8973
+ VAR1 (UNOP, lroundusf, 2, si)
8974
+ VAR1 (UNOP, lroundudf, 2, di)
8976
+ VAR1 (UNOP, lceilv2sf, 2, v2si)
8977
+ VAR1 (UNOP, lceilv4sf, 2, v4si)
8978
+ VAR1 (UNOP, lceilv2df, 2, v2di)
8980
+ VAR1 (UNOP, lceiluv2sf, 2, v2si)
8981
+ VAR1 (UNOP, lceiluv4sf, 2, v4si)
8982
+ VAR1 (UNOP, lceiluv2df, 2, v2di)
8983
+ VAR1 (UNOP, lceilusf, 2, si)
8984
+ VAR1 (UNOP, lceiludf, 2, di)
8986
+ VAR1 (UNOP, lfloorv2sf, 2, v2si)
8987
+ VAR1 (UNOP, lfloorv4sf, 2, v4si)
8988
+ VAR1 (UNOP, lfloorv2df, 2, v2di)
8990
+ VAR1 (UNOP, lflooruv2sf, 2, v2si)
8991
+ VAR1 (UNOP, lflooruv4sf, 2, v4si)
8992
+ VAR1 (UNOP, lflooruv2df, 2, v2di)
8993
+ VAR1 (UNOP, lfloorusf, 2, si)
8994
+ VAR1 (UNOP, lfloorudf, 2, di)
8996
+ VAR1 (UNOP, lfrintnv2sf, 2, v2si)
8997
+ VAR1 (UNOP, lfrintnv4sf, 2, v4si)
8998
+ VAR1 (UNOP, lfrintnv2df, 2, v2di)
8999
+ VAR1 (UNOP, lfrintnsf, 2, si)
9000
+ VAR1 (UNOP, lfrintndf, 2, di)
9002
+ VAR1 (UNOP, lfrintnuv2sf, 2, v2si)
9003
+ VAR1 (UNOP, lfrintnuv4sf, 2, v4si)
9004
+ VAR1 (UNOP, lfrintnuv2df, 2, v2di)
9005
+ VAR1 (UNOP, lfrintnusf, 2, si)
9006
+ VAR1 (UNOP, lfrintnudf, 2, di)
9008
+ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
9009
+ VAR1 (UNOP, floatv2si, 2, v2sf)
9010
+ VAR1 (UNOP, floatv4si, 2, v4sf)
9011
+ VAR1 (UNOP, floatv2di, 2, v2df)
9013
+ VAR1 (UNOP, floatunsv2si, 2, v2sf)
9014
+ VAR1 (UNOP, floatunsv4si, 2, v4sf)
9015
+ VAR1 (UNOP, floatunsv2di, 2, v2df)
9018
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
9019
- BUILTIN_VALL (BINOP, zip1)
9020
- BUILTIN_VALL (BINOP, zip2)
9021
- BUILTIN_VALL (BINOP, uzp1)
9022
- BUILTIN_VALL (BINOP, uzp2)
9023
- BUILTIN_VALL (BINOP, trn1)
9024
- BUILTIN_VALL (BINOP, trn2)
9025
+ BUILTIN_VALL (BINOP, zip1, 0)
9026
+ BUILTIN_VALL (BINOP, zip2, 0)
9027
+ BUILTIN_VALL (BINOP, uzp1, 0)
9028
+ BUILTIN_VALL (BINOP, uzp2, 0)
9029
+ BUILTIN_VALL (BINOP, trn1, 0)
9030
+ BUILTIN_VALL (BINOP, trn2, 0)
9033
+ aarch64_frecp<FRECP:frecp_suffix><mode>. */
9034
+ BUILTIN_GPF (UNOP, frecpe, 0)
9035
+ BUILTIN_GPF (BINOP, frecps, 0)
9036
+ BUILTIN_GPF (UNOP, frecpx, 0)
9038
+ BUILTIN_VDQF (UNOP, frecpe, 0)
9039
+ BUILTIN_VDQF (BINOP, frecps, 0)
9041
+ BUILTIN_VDQF (UNOP, abs, 2)
9043
+ VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
9044
+ VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
9046
+ VAR1 (UNOP, float_extend_lo_, 0, v2df)
9047
+ VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
9048
--- a/src/gcc/config/aarch64/constraints.md
9049
+++ b/src/gcc/config/aarch64/constraints.md
9050
@@ -152,6 +152,22 @@
9052
NULL, NULL) != 0")))
9054
+(define_constraint "Dh"
9056
+ A constraint that matches an immediate operand valid for\
9057
+ AdvSIMD scalar move in HImode."
9058
+ (and (match_code "const_int")
9059
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
9062
+(define_constraint "Dq"
9064
+ A constraint that matches an immediate operand valid for\
9065
+ AdvSIMD scalar move in QImode."
9066
+ (and (match_code "const_int")
9067
+ (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
9070
(define_constraint "Dl"
9072
A constraint that matches vector of immediates for left shifts."
9073
--- a/src/gcc/config/aarch64/aarch64.c
9074
+++ b/src/gcc/config/aarch64/aarch64.c
9075
@@ -3349,7 +3349,7 @@
9076
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
9079
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
9080
+ asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
9084
@@ -3362,8 +3362,7 @@
9085
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
9088
- asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
9089
- REGNO (x) - V0_REGNUM + (code - 'S'));
9090
+ asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
9094
@@ -3373,20 +3372,19 @@
9096
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
9098
- asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
9099
+ asm_fprintf (f, "%czr", code);
9103
if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
9105
- asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
9106
- REGNO (x) - R0_REGNUM);
9107
+ asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
9111
if (REG_P (x) && REGNO (x) == SP_REGNUM)
9113
- asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
9114
+ asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
9118
@@ -6392,6 +6390,21 @@
9119
return gen_rtx_CONST_VECTOR (mode, v);
9122
+/* Check OP is a legal scalar immediate for the MOVI instruction. */
9125
+aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
9127
+ enum machine_mode vmode;
9129
+ gcc_assert (!VECTOR_MODE_P (mode));
9130
+ vmode = aarch64_preferred_simd_mode (mode);
9131
+ rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
9132
+ int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
9133
+ NULL, NULL, NULL, NULL);
9137
/* Construct and return a PARALLEL RTX vector. */
9139
aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
9140
@@ -7825,6 +7838,9 @@
9141
#undef TARGET_EXPAND_BUILTIN_VA_START
9142
#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9144
+#undef TARGET_FOLD_BUILTIN
9145
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9147
#undef TARGET_FUNCTION_ARG
9148
#define TARGET_FUNCTION_ARG aarch64_function_arg
9150
@@ -7846,6 +7862,9 @@
9151
#undef TARGET_FRAME_POINTER_REQUIRED
9152
#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9154
+#undef TARGET_GIMPLE_FOLD_BUILTIN
9155
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9157
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
9158
#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9160
--- a/src/gcc/config/aarch64/iterators.md
9161
+++ b/src/gcc/config/aarch64/iterators.md
9163
UNSPEC_CMTST ; Used in aarch64-simd.md.
9164
UNSPEC_FMAX ; Used in aarch64-simd.md.
9165
UNSPEC_FMIN ; Used in aarch64-simd.md.
9166
- UNSPEC_BSL ; Used in aarch64-simd.md.
9167
UNSPEC_TBL ; Used in vector permute patterns.
9168
UNSPEC_CONCAT ; Used in vector permute patterns.
9169
UNSPEC_ZIP1 ; Used in vector permute patterns.
9171
;; 32-bit version and "%x0" in the 64-bit version.
9172
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
9174
+;; For constraints used in scalar immediate vector moves
9175
+(define_mode_attr hq [(HI "h") (QI "q")])
9177
;; For scalar usage of vector/FP registers
9178
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
9179
(V8QI "") (V16QI "")
9181
;; Iterator for integer conversions
9182
(define_code_iterator FIXUORS [fix unsigned_fix])
9184
+;; Iterator for float conversions
9185
+(define_code_iterator FLOATUORS [float unsigned_float])
9187
;; Code iterator for variants of vector max and min.
9188
(define_code_iterator MAXMIN [smax smin umax umin])
9190
@@ -555,6 +560,10 @@
9191
(zero_extend "zero_extend")
9192
(sign_extract "extv")
9193
(zero_extract "extzv")
9195
+ (unsigned_fix "fixuns")
9197
+ (unsigned_float "floatuns")
9201
@@ -573,10 +582,14 @@
9205
+(define_code_attr fix_trunc_optab [(fix "fix_trunc")
9206
+ (unsigned_fix "fixuns_trunc")])
9208
;; Optab prefix for sign/zero-extending operations
9209
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
9211
(fix "") (unsigned_fix "u")
9212
+ (float "s") (unsigned_float "u")
9213
(ss_plus "s") (us_plus "u")
9214
(ss_minus "s") (us_minus "u")])
9216
@@ -690,11 +703,14 @@
9217
UNSPEC_UZP1 UNSPEC_UZP2])
9219
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
9220
- UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
9221
+ UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
9224
(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
9226
+ UNSPEC_FRINTA UNSPEC_FRINTN])
9228
+(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
9230
;; -------------------------------------------------------------------
9231
;; Int Iterators Attributes.
9232
;; -------------------------------------------------------------------
9233
@@ -783,15 +799,18 @@
9234
(UNSPEC_FRINTM "floor")
9235
(UNSPEC_FRINTI "nearbyint")
9236
(UNSPEC_FRINTX "rint")
9237
- (UNSPEC_FRINTA "round")])
9238
+ (UNSPEC_FRINTA "round")
9239
+ (UNSPEC_FRINTN "frintn")])
9241
;; frint suffix for floating-point rounding instructions.
9242
(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p")
9243
(UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i")
9244
- (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")])
9245
+ (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")
9246
+ (UNSPEC_FRINTN "n")])
9248
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
9249
- (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
9250
+ (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
9251
+ (UNSPEC_FRINTN "frintn")])
9253
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
9254
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
9256
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
9257
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
9258
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
9260
+(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
9261
--- a/src/gcc/config/aarch64/aarch64.h
9262
+++ b/src/gcc/config/aarch64/aarch64.h
9265
#define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y)
9267
+#define REVERSIBLE_CC_MODE(MODE) 1
9269
#define REVERSE_CONDITION(CODE, MODE) \
9270
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
9271
? reverse_condition_maybe_unordered (CODE) \
9272
--- a/src/gcc/config/arm/arm-tables.opt
9273
+++ b/src/gcc/config/arm/arm-tables.opt
9275
Enum(processor_type) String(cortex-a15) Value(cortexa15)
9278
+Enum(processor_type) String(cortex-a53) Value(cortexa53)
9281
Enum(processor_type) String(cortex-r4) Value(cortexr4)
9285
Enum(processor_type) String(cortex-r5) Value(cortexr5)
9288
+Enum(processor_type) String(cortex-r7) Value(cortexr7)
9291
Enum(processor_type) String(cortex-m4) Value(cortexm4)
9294
--- a/src/gcc/config/arm/arm.c
9295
+++ b/src/gcc/config/arm/arm.c
9297
static tree arm_builtin_decl (unsigned, bool);
9298
static void emit_constant_insn (rtx cond, rtx pattern);
9299
static rtx emit_set_insn (rtx, rtx);
9300
+static rtx emit_multi_reg_push (unsigned long);
9301
static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
9303
static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
9304
@@ -620,6 +621,13 @@
9305
#undef TARGET_CLASS_LIKELY_SPILLED_P
9306
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
9308
+#undef TARGET_VECTORIZE_BUILTINS
9309
+#define TARGET_VECTORIZE_BUILTINS
9311
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9312
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9313
+ arm_builtin_vectorized_function
9315
#undef TARGET_VECTOR_ALIGNMENT
9316
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
9318
@@ -839,6 +847,10 @@
9319
int arm_arch_arm_hwdiv;
9320
int arm_arch_thumb_hwdiv;
9322
+/* Nonzero if we should use Neon to handle 64-bits operations rather
9323
+ than core registers. */
9324
+int prefer_neon_for_64bits = 0;
9326
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
9327
we must report the mode of the memory reference from
9328
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
9330
false, /* Prefer LDRD/STRD. */
9331
{true, true}, /* Prefer non short circuit. */
9332
&arm_default_vec_cost, /* Vectorizer costs. */
9333
+ false /* Prefer Neon for 64-bits bitops. */
9336
const struct tune_params arm_fastmul_tune =
9338
false, /* Prefer LDRD/STRD. */
9339
{true, true}, /* Prefer non short circuit. */
9340
&arm_default_vec_cost, /* Vectorizer costs. */
9341
+ false /* Prefer Neon for 64-bits bitops. */
9344
/* StrongARM has early execution of branches, so a sequence that is worth
9346
false, /* Prefer LDRD/STRD. */
9347
{true, true}, /* Prefer non short circuit. */
9348
&arm_default_vec_cost, /* Vectorizer costs. */
9349
+ false /* Prefer Neon for 64-bits bitops. */
9352
const struct tune_params arm_xscale_tune =
9354
false, /* Prefer LDRD/STRD. */
9355
{true, true}, /* Prefer non short circuit. */
9356
&arm_default_vec_cost, /* Vectorizer costs. */
9357
+ false /* Prefer Neon for 64-bits bitops. */
9360
const struct tune_params arm_9e_tune =
9361
@@ -995,6 +1011,7 @@
9362
false, /* Prefer LDRD/STRD. */
9363
{true, true}, /* Prefer non short circuit. */
9364
&arm_default_vec_cost, /* Vectorizer costs. */
9365
+ false /* Prefer Neon for 64-bits bitops. */
9368
const struct tune_params arm_v6t2_tune =
9369
@@ -1009,6 +1026,7 @@
9370
false, /* Prefer LDRD/STRD. */
9371
{true, true}, /* Prefer non short circuit. */
9372
&arm_default_vec_cost, /* Vectorizer costs. */
9373
+ false /* Prefer Neon for 64-bits bitops. */
9376
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
9377
@@ -1024,6 +1042,7 @@
9378
false, /* Prefer LDRD/STRD. */
9379
{true, true}, /* Prefer non short circuit. */
9380
&arm_default_vec_cost, /* Vectorizer costs. */
9381
+ false /* Prefer Neon for 64-bits bitops. */
9384
const struct tune_params arm_cortex_a15_tune =
9385
@@ -1038,6 +1057,7 @@
9386
true, /* Prefer LDRD/STRD. */
9387
{true, true}, /* Prefer non short circuit. */
9388
&arm_default_vec_cost, /* Vectorizer costs. */
9389
+ false /* Prefer Neon for 64-bits bitops. */
9392
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
9393
@@ -1055,6 +1075,7 @@
9394
false, /* Prefer LDRD/STRD. */
9395
{false, false}, /* Prefer non short circuit. */
9396
&arm_default_vec_cost, /* Vectorizer costs. */
9397
+ false /* Prefer Neon for 64-bits bitops. */
9400
const struct tune_params arm_cortex_a9_tune =
9401
@@ -1069,6 +1090,7 @@
9402
false, /* Prefer LDRD/STRD. */
9403
{true, true}, /* Prefer non short circuit. */
9404
&arm_default_vec_cost, /* Vectorizer costs. */
9405
+ false /* Prefer Neon for 64-bits bitops. */
9408
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
9409
@@ -1085,6 +1107,7 @@
9410
false, /* Prefer LDRD/STRD. */
9411
{false, false}, /* Prefer non short circuit. */
9412
&arm_default_vec_cost, /* Vectorizer costs. */
9413
+ false /* Prefer Neon for 64-bits bitops. */
9416
const struct tune_params arm_fa726te_tune =
9417
@@ -1099,6 +1122,7 @@
9418
false, /* Prefer LDRD/STRD. */
9419
{true, true}, /* Prefer non short circuit. */
9420
&arm_default_vec_cost, /* Vectorizer costs. */
9421
+ false /* Prefer Neon for 64-bits bitops. */
9425
@@ -2129,6 +2153,12 @@
9426
global_options.x_param_values,
9427
global_options_set.x_param_values);
9429
+ /* Use Neon to perform 64-bits operations rather than core
9431
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
9432
+ if (use_neon_for_64bits == 1)
9433
+ prefer_neon_for_64bits = true;
9435
/* Use the alternative scheduling-pressure algorithm by default. */
9436
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
9437
global_options.x_param_values,
9438
@@ -2382,6 +2412,10 @@
9439
if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
9442
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
9443
+ && !optimize_function_for_size_p (cfun))
9446
offsets = arm_get_frame_offsets ();
9447
stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9449
@@ -2617,6 +2651,9 @@
9454
+ return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
9455
+ && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
9457
return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
9459
@@ -12596,6 +12633,277 @@
9463
+/* Helper for gen_operands_ldrd_strd. Returns true iff the memory
9464
+ operand ADDR is an immediate offset from the base register and is
9465
+ not volatile, in which case it sets BASE and OFFSET
9468
+mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
9470
+ /* TODO: Handle more general memory operand patterns, such as
9471
+ PRE_DEC and PRE_INC. */
9473
+ /* Convert a subreg of mem into mem itself. */
9474
+ if (GET_CODE (addr) == SUBREG)
9475
+ addr = alter_subreg (&addr, true);
9477
+ gcc_assert (MEM_P (addr));
9479
+ /* Don't modify volatile memory accesses. */
9480
+ if (MEM_VOLATILE_P (addr))
9483
+ *offset = const0_rtx;
9485
+ addr = XEXP (addr, 0);
9491
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
9493
+ *base = XEXP (addr, 0);
9494
+ *offset = XEXP (addr, 1);
9495
+ return (REG_P (*base) && CONST_INT_P (*offset));
9501
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
9503
+/* Called from a peephole2 to replace two word-size accesses with a
9504
+ single LDRD/STRD instruction. Returns true iff we can generate a
9505
+ new instruction sequence. That is, both accesses use the same base
9506
+ register and the gap between constant offsets is 4. This function
9507
+ may reorder its operands to match ldrd/strd RTL templates.
9508
+ OPERANDS are the operands found by the peephole matcher;
9509
+ OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
9510
+ corresponding memory operands. LOAD indicaates whether the access
9511
+ is load or store. CONST_STORE indicates a store of constant
9512
+ integer values held in OPERANDS[4,5] and assumes that the pattern
9513
+ is of length 4 insn, for the purpose of checking dead registers.
9514
+ COMMUTE indicates that register operands may be reordered. */
9516
+gen_operands_ldrd_strd (rtx *operands, bool load,
9517
+ bool const_store, bool commute)
9520
+ HOST_WIDE_INT offsets[2], offset;
9521
+ rtx base = NULL_RTX;
9522
+ rtx cur_base, cur_offset, tmp;
9524
+ HARD_REG_SET regset;
9526
+ gcc_assert (!const_store || !load);
9527
+ /* Check that the memory references are immediate offsets from the
9528
+ same base register. Extract the base register, the destination
9529
+ registers, and the corresponding memory offsets. */
9530
+ for (i = 0; i < nops; i++)
9532
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
9537
+ else if (REGNO (base) != REGNO (cur_base))
9540
+ offsets[i] = INTVAL (cur_offset);
9541
+ if (GET_CODE (operands[i]) == SUBREG)
9543
+ tmp = SUBREG_REG (operands[i]);
9544
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
9545
+ operands[i] = tmp;
9549
+ /* Make sure there is no dependency between the individual loads. */
9550
+ if (load && REGNO (operands[0]) == REGNO (base))
9551
+ return false; /* RAW */
9553
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
9554
+ return false; /* WAW */
9556
+ /* If the same input register is used in both stores
9557
+ when storing different constants, try to find a free register.
9558
+ For example, the code
9563
+ can be transformed into
9566
+ in Thumb mode assuming that r1 is free. */
9568
+ && REGNO (operands[0]) == REGNO (operands[1])
9569
+ && INTVAL (operands[4]) != INTVAL (operands[5]))
9571
+ if (TARGET_THUMB2)
9573
+ CLEAR_HARD_REG_SET (regset);
9574
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
9575
+ if (tmp == NULL_RTX)
9578
+ /* Use the new register in the first load to ensure that
9579
+ if the original input register is not dead after peephole,
9580
+ then it will have the correct constant value. */
9581
+ operands[0] = tmp;
9583
+ else if (TARGET_ARM)
9586
+ int regno = REGNO (operands[0]);
9587
+ if (!peep2_reg_dead_p (4, operands[0]))
9589
+ /* When the input register is even and is not dead after the
9590
+ pattern, it has to hold the second constant but we cannot
9591
+ form a legal STRD in ARM mode with this register as the second
9593
+ if (regno % 2 == 0)
9596
+ /* Is regno-1 free? */
9597
+ SET_HARD_REG_SET (regset);
9598
+ CLEAR_HARD_REG_BIT(regset, regno - 1);
9599
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
9600
+ if (tmp == NULL_RTX)
9603
+ operands[0] = tmp;
9607
+ /* Find a DImode register. */
9608
+ CLEAR_HARD_REG_SET (regset);
9609
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
9610
+ if (tmp != NULL_RTX)
9612
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
9613
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
9617
+ /* Can we use the input register to form a DI register? */
9618
+ SET_HARD_REG_SET (regset);
9619
+ CLEAR_HARD_REG_BIT(regset,
9620
+ regno % 2 == 0 ? regno + 1 : regno - 1);
9621
+ tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
9622
+ if (tmp == NULL_RTX)
9624
+ operands[regno % 2 == 1 ? 0 : 1] = tmp;
9628
+ gcc_assert (operands[0] != NULL_RTX);
9629
+ gcc_assert (operands[1] != NULL_RTX);
9630
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
9631
+ gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
9635
+ /* Make sure the instructions are ordered with lower memory access first. */
9636
+ if (offsets[0] > offsets[1])
9638
+ gap = offsets[0] - offsets[1];
9639
+ offset = offsets[1];
9641
+ /* Swap the instructions such that lower memory is accessed first. */
9642
+ SWAP_RTX (operands[0], operands[1]);
9643
+ SWAP_RTX (operands[2], operands[3]);
9645
+ SWAP_RTX (operands[4], operands[5]);
9649
+ gap = offsets[1] - offsets[0];
9650
+ offset = offsets[0];
9653
+ /* Make sure accesses are to consecutive memory locations. */
9657
+ /* Make sure we generate legal instructions. */
9658
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
9662
+ /* In Thumb state, where registers are almost unconstrained, there
9663
+ is little hope to fix it. */
9664
+ if (TARGET_THUMB2)
9667
+ if (load && commute)
9669
+ /* Try reordering registers. */
9670
+ SWAP_RTX (operands[0], operands[1]);
9671
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
9678
+ /* If input registers are dead after this pattern, they can be
9679
+ reordered or replaced by other registers that are free in the
9680
+ current pattern. */
9681
+ if (!peep2_reg_dead_p (4, operands[0])
9682
+ || !peep2_reg_dead_p (4, operands[1]))
9685
+ /* Try to reorder the input registers. */
9686
+ /* For example, the code
9691
+ can be transformed into
9696
+ if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
9699
+ SWAP_RTX (operands[0], operands[1]);
9703
+ /* Try to find a free DI register. */
9704
+ CLEAR_HARD_REG_SET (regset);
9705
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
9706
+ add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
9709
+ tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
9710
+ if (tmp == NULL_RTX)
9713
+ /* DREG must be an even-numbered register in DImode.
9714
+ Split it into SI registers. */
9715
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
9716
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
9717
+ gcc_assert (operands[0] != NULL_RTX);
9718
+ gcc_assert (operands[1] != NULL_RTX);
9719
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
9720
+ gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
9722
+ return (operands_ok_ldrd_strd (operands[0], operands[1],
9735
/* Print a symbolic form of X to the debug file, F. */
9737
@@ -14787,7 +15095,8 @@
9739
/* Constraints should ensure this. */
9740
gcc_assert (code0 == MEM && code1 == REG);
9741
- gcc_assert (REGNO (operands[1]) != IP_REGNUM);
9742
+ gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
9743
+ || (TARGET_ARM && TARGET_LDRD));
9745
switch (GET_CODE (XEXP (operands[0], 0)))
9747
@@ -16380,6 +16689,148 @@
9751
+/* STRD in ARM mode requires consecutive registers. This function emits STRD
9752
+ whenever possible, otherwise it emits single-word stores. The first store
9753
+ also allocates stack space for all saved registers, using writeback with
9754
+ post-addressing mode. All other stores use offset addressing. If no STRD
9755
+ can be emitted, this function emits a sequence of single-word stores,
9756
+ and not an STM as before, because single-word stores provide more freedom
9757
+ scheduling and can be turned into an STM by peephole optimizations. */
9759
+arm_emit_strd_push (unsigned long saved_regs_mask)
9762
+ int i, j, dwarf_index = 0;
9764
+ rtx dwarf = NULL_RTX;
9765
+ rtx insn = NULL_RTX;
9768
+ /* TODO: A more efficient code can be emitted by changing the
9769
+ layout, e.g., first push all pairs that can use STRD to keep the
9770
+ stack aligned, and then push all other registers. */
9771
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
9772
+ if (saved_regs_mask & (1 << i))
9775
+ gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
9776
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
9777
+ gcc_assert (num_regs > 0);
9779
+ /* Create sequence for DWARF info. */
9780
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
9782
+ /* For dwarf info, we generate explicit stack update. */
9783
+ tmp = gen_rtx_SET (VOIDmode,
9784
+ stack_pointer_rtx,
9785
+ plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
9786
+ RTX_FRAME_RELATED_P (tmp) = 1;
9787
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
9789
+ /* Save registers. */
9790
+ offset = - 4 * num_regs;
9792
+ while (j <= LAST_ARM_REGNUM)
9793
+ if (saved_regs_mask & (1 << j))
9796
+ && (saved_regs_mask & (1 << (j + 1))))
9798
+ /* Current register and previous register form register pair for
9799
+ which STRD can be generated. */
9802
+ /* Allocate stack space for all saved registers. */
9803
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
9804
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
9805
+ mem = gen_frame_mem (DImode, tmp);
9808
+ else if (offset > 0)
9809
+ mem = gen_frame_mem (DImode,
9810
+ plus_constant (Pmode,
9811
+ stack_pointer_rtx,
9814
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
9816
+ tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
9817
+ RTX_FRAME_RELATED_P (tmp) = 1;
9818
+ tmp = emit_insn (tmp);
9820
+ /* Record the first store insn. */
9821
+ if (dwarf_index == 1)
9824
+ /* Generate dwarf info. */
9825
+ mem = gen_frame_mem (SImode,
9826
+ plus_constant (Pmode,
9827
+ stack_pointer_rtx,
9829
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
9830
+ RTX_FRAME_RELATED_P (tmp) = 1;
9831
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
9833
+ mem = gen_frame_mem (SImode,
9834
+ plus_constant (Pmode,
9835
+ stack_pointer_rtx,
9837
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
9838
+ RTX_FRAME_RELATED_P (tmp) = 1;
9839
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
9846
+ /* Emit a single word store. */
9849
+ /* Allocate stack space for all saved registers. */
9850
+ tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
9851
+ tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
9852
+ mem = gen_frame_mem (SImode, tmp);
9855
+ else if (offset > 0)
9856
+ mem = gen_frame_mem (SImode,
9857
+ plus_constant (Pmode,
9858
+ stack_pointer_rtx,
9861
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
9863
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
9864
+ RTX_FRAME_RELATED_P (tmp) = 1;
9865
+ tmp = emit_insn (tmp);
9867
+ /* Record the first store insn. */
9868
+ if (dwarf_index == 1)
9871
+ /* Generate dwarf info. */
9872
+ mem = gen_frame_mem (SImode,
9873
+ plus_constant(Pmode,
9874
+ stack_pointer_rtx,
9876
+ tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
9877
+ RTX_FRAME_RELATED_P (tmp) = 1;
9878
+ XVECEXP (dwarf, 0, dwarf_index++) = tmp;
9887
+ /* Attach dwarf info to the first insn we generate. */
9888
+ gcc_assert (insn != NULL_RTX);
9889
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
9890
+ RTX_FRAME_RELATED_P (insn) = 1;
9893
/* Generate and emit an insn that we will recognize as a push_multi.
9894
Unfortunately, since this insn does not reflect very well the actual
9895
semantics of the operation, we need to annotate the insn for the benefit
9896
@@ -16579,6 +17030,17 @@
9897
if (saved_regs_mask & (1 << i))
9899
reg = gen_rtx_REG (SImode, i);
9900
+ if ((num_regs == 1) && emit_update && !return_in_pc)
9902
+ /* Emit single load with writeback. */
9903
+ tmp = gen_frame_mem (SImode,
9904
+ gen_rtx_POST_INC (Pmode,
9905
+ stack_pointer_rtx));
9906
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
9907
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
9911
tmp = gen_rtx_SET (VOIDmode,
9914
@@ -16810,6 +17272,129 @@
9918
+/* LDRD in ARM mode needs consecutive registers as operands. This function
9919
+ emits LDRD whenever possible, otherwise it emits single-word loads. It uses
9920
+ offset addressing and then generates one separate stack udpate. This provides
9921
+ more scheduling freedom, compared to writeback on every load. However,
9922
+ if the function returns using load into PC directly
9923
+ (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
9924
+ before the last load. TODO: Add a peephole optimization to recognize
9925
+ the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
9926
+ peephole optimization to merge the load at stack-offset zero
9927
+ with the stack update instruction using load with writeback
9928
+ in post-index addressing mode. */
9930
+arm_emit_ldrd_pop (unsigned long saved_regs_mask)
9934
+ rtx par = NULL_RTX;
9935
+ rtx dwarf = NULL_RTX;
9938
+ /* Restore saved registers. */
9939
+ gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
9941
+ while (j <= LAST_ARM_REGNUM)
9942
+ if (saved_regs_mask & (1 << j))
9945
+ && (saved_regs_mask & (1 << (j + 1)))
9946
+ && (j + 1) != PC_REGNUM)
9948
+ /* Current register and next register form register pair for which
9949
+ LDRD can be generated. PC is always the last register popped, and
9950
+ we handle it separately. */
9952
+ mem = gen_frame_mem (DImode,
9953
+ plus_constant (Pmode,
9954
+ stack_pointer_rtx,
9957
+ mem = gen_frame_mem (DImode, stack_pointer_rtx);
9959
+ tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
9960
+ RTX_FRAME_RELATED_P (tmp) = 1;
9961
+ tmp = emit_insn (tmp);
9963
+ /* Generate dwarf info. */
9965
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
9966
+ gen_rtx_REG (SImode, j),
9968
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
9969
+ gen_rtx_REG (SImode, j + 1),
9972
+ REG_NOTES (tmp) = dwarf;
9977
+ else if (j != PC_REGNUM)
9979
+ /* Emit a single word load. */
9981
+ mem = gen_frame_mem (SImode,
9982
+ plus_constant (Pmode,
9983
+ stack_pointer_rtx,
9986
+ mem = gen_frame_mem (SImode, stack_pointer_rtx);
9988
+ tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
9989
+ RTX_FRAME_RELATED_P (tmp) = 1;
9990
+ tmp = emit_insn (tmp);
9992
+ /* Generate dwarf info. */
9993
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
9994
+ gen_rtx_REG (SImode, j),
10000
+ else /* j == PC_REGNUM */
10006
+ /* Update the stack. */
10009
+ tmp = gen_rtx_SET (Pmode,
10010
+ stack_pointer_rtx,
10011
+ plus_constant (Pmode,
10012
+ stack_pointer_rtx,
10014
+ RTX_FRAME_RELATED_P (tmp) = 1;
10019
+ if (saved_regs_mask & (1 << PC_REGNUM))
10021
+ /* Only PC is to be popped. */
10022
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
10023
+ XVECEXP (par, 0, 0) = ret_rtx;
10024
+ tmp = gen_rtx_SET (SImode,
10025
+ gen_rtx_REG (SImode, PC_REGNUM),
10026
+ gen_frame_mem (SImode,
10027
+ gen_rtx_POST_INC (SImode,
10028
+ stack_pointer_rtx)));
10029
+ RTX_FRAME_RELATED_P (tmp) = 1;
10030
+ XVECEXP (par, 0, 1) = tmp;
10031
+ par = emit_jump_insn (par);
10033
+ /* Generate dwarf info. */
10034
+ dwarf = alloc_reg_note (REG_CFA_RESTORE,
10035
+ gen_rtx_REG (SImode, PC_REGNUM),
10037
+ REG_NOTES (par) = dwarf;
10041
/* Calculate the size of the return value that is passed in registers. */
10043
arm_size_return_regs (void)
10044
@@ -17019,9 +17604,10 @@
10045
/* If it is safe to use r3, then do so. This sometimes
10046
generates better code on Thumb-2 by avoiding the need to
10047
use 32-bit push/pop instructions. */
10048
- if (! any_sibcall_uses_r3 ()
10049
+ if (! any_sibcall_uses_r3 ()
10050
&& arm_size_return_regs () <= 12
10051
- && (offsets->saved_regs_mask & (1 << 3)) == 0)
10052
+ && (offsets->saved_regs_mask & (1 << 3)) == 0
10053
+ && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
10057
@@ -17453,6 +18039,12 @@
10059
thumb2_emit_strd_push (live_regs_mask);
10061
+ else if (TARGET_ARM
10062
+ && !TARGET_APCS_FRAME
10063
+ && !IS_INTERRUPT (func_type))
10065
+ arm_emit_strd_push (live_regs_mask);
10069
insn = emit_multi_reg_push (live_regs_mask);
10070
@@ -19332,6 +19924,7 @@
10078
@@ -19349,14 +19942,15 @@
10079
#define TYPE_MODE_BIT(X) (1 << (X))
10081
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
10082
- | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
10083
- | TYPE_MODE_BIT (T_DI))
10084
+ | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
10085
+ | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
10086
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
10087
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
10088
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
10090
#define v8qi_UP T_V8QI
10091
#define v4hi_UP T_V4HI
10092
+#define v4hf_UP T_V4HF
10093
#define v2si_UP T_V2SI
10094
#define v2sf_UP T_V2SF
10096
@@ -19392,6 +19986,8 @@
10100
+ NEON_FLOAT_WIDEN,
10101
+ NEON_FLOAT_NARROW,
10105
@@ -19452,7 +20048,8 @@
10106
VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
10107
{#N, NEON_##T, UP (J), CF (N, J), 0}
10109
-/* The mode entries in the following table correspond to the "key" type of the
10110
+/* The NEON builtin data can be found in arm_neon_builtins.def.
10111
+ The mode entries in the following table correspond to the "key" type of the
10112
instruction variant, i.e. equivalent to that which would be specified after
10113
the assembler mnemonic, which usually refers to the last vector operand.
10114
(Signed/unsigned/polynomial types are not differentiated between though, and
10115
@@ -19462,196 +20059,7 @@
10117
static neon_builtin_datum neon_builtin_data[] =
10119
- VAR10 (BINOP, vadd,
10120
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10121
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
10122
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
10123
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10124
- VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10125
- VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
10126
- VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10127
- VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10128
- VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
10129
- VAR2 (TERNOP, vfma, v2sf, v4sf),
10130
- VAR2 (TERNOP, vfms, v2sf, v4sf),
10131
- VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10132
- VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
10133
- VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
10134
- VAR2 (TERNOP, vqdmlal, v4hi, v2si),
10135
- VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
10136
- VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
10137
- VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
10138
- VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
10139
- VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
10140
- VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
10141
- VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
10142
- VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
10143
- VAR2 (BINOP, vqdmull, v4hi, v2si),
10144
- VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10145
- VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10146
- VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10147
- VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
10148
- VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
10149
- VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
10150
- VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10151
- VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10152
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10153
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
10154
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10155
- VAR10 (BINOP, vsub,
10156
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10157
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
10158
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
10159
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10160
- VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10161
- VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
10162
- VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10163
- VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10164
- VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10165
- VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10166
- VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10167
- VAR2 (BINOP, vcage, v2sf, v4sf),
10168
- VAR2 (BINOP, vcagt, v2sf, v4sf),
10169
- VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10170
- VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10171
- VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
10172
- VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10173
- VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
10174
- VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10175
- VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10176
- VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
10177
- VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10178
- VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10179
- VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
10180
- VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
10181
- VAR2 (BINOP, vrecps, v2sf, v4sf),
10182
- VAR2 (BINOP, vrsqrts, v2sf, v4sf),
10183
- VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10184
- VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
10185
- VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10186
- VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10187
- VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10188
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10189
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10190
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10191
- VAR2 (UNOP, vcnt, v8qi, v16qi),
10192
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
10193
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
10194
- VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
10195
- /* FIXME: vget_lane supports more variants than this! */
10196
- VAR10 (GETLANE, vget_lane,
10197
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10198
- VAR10 (SETLANE, vset_lane,
10199
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10200
- VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
10201
- VAR10 (DUP, vdup_n,
10202
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10203
- VAR10 (DUPLANE, vdup_lane,
10204
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10205
- VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
10206
- VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
10207
- VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
10208
- VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
10209
- VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
10210
- VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
10211
- VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
10212
- VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10213
- VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10214
- VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
10215
- VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
10216
- VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10217
- VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
10218
- VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
10219
- VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10220
- VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10221
- VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
10222
- VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
10223
- VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10224
- VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
10225
- VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
10226
- VAR10 (BINOP, vext,
10227
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10228
- VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10229
- VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
10230
- VAR2 (UNOP, vrev16, v8qi, v16qi),
10231
- VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
10232
- VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
10233
- VAR10 (SELECT, vbsl,
10234
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10235
- VAR2 (RINT, vrintn, v2sf, v4sf),
10236
- VAR2 (RINT, vrinta, v2sf, v4sf),
10237
- VAR2 (RINT, vrintp, v2sf, v4sf),
10238
- VAR2 (RINT, vrintm, v2sf, v4sf),
10239
- VAR2 (RINT, vrintz, v2sf, v4sf),
10240
- VAR2 (RINT, vrintx, v2sf, v4sf),
10241
- VAR1 (VTBL, vtbl1, v8qi),
10242
- VAR1 (VTBL, vtbl2, v8qi),
10243
- VAR1 (VTBL, vtbl3, v8qi),
10244
- VAR1 (VTBL, vtbl4, v8qi),
10245
- VAR1 (VTBX, vtbx1, v8qi),
10246
- VAR1 (VTBX, vtbx2, v8qi),
10247
- VAR1 (VTBX, vtbx3, v8qi),
10248
- VAR1 (VTBX, vtbx4, v8qi),
10249
- VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10250
- VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10251
- VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
10252
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
10253
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
10254
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
10255
- VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
10256
- VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
10257
- VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
10258
- VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
10259
- VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
10260
- VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
10261
- VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
10262
- VAR10 (LOAD1, vld1,
10263
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10264
- VAR10 (LOAD1LANE, vld1_lane,
10265
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10266
- VAR10 (LOAD1, vld1_dup,
10267
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10268
- VAR10 (STORE1, vst1,
10269
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10270
- VAR10 (STORE1LANE, vst1_lane,
10271
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10272
- VAR9 (LOADSTRUCT,
10273
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10274
- VAR7 (LOADSTRUCTLANE, vld2_lane,
10275
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10276
- VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
10277
- VAR9 (STORESTRUCT, vst2,
10278
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10279
- VAR7 (STORESTRUCTLANE, vst2_lane,
10280
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10281
- VAR9 (LOADSTRUCT,
10282
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10283
- VAR7 (LOADSTRUCTLANE, vld3_lane,
10284
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10285
- VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
10286
- VAR9 (STORESTRUCT, vst3,
10287
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10288
- VAR7 (STORESTRUCTLANE, vst3_lane,
10289
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10290
- VAR9 (LOADSTRUCT, vld4,
10291
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10292
- VAR7 (LOADSTRUCTLANE, vld4_lane,
10293
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10294
- VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
10295
- VAR9 (STORESTRUCT, vst4,
10296
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
10297
- VAR7 (STORESTRUCTLANE, vst4_lane,
10298
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
10299
- VAR10 (LOGICBINOP, vand,
10300
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10301
- VAR10 (LOGICBINOP, vorr,
10302
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10303
- VAR10 (BINOP, veor,
10304
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10305
- VAR10 (LOGICBINOP, vbic,
10306
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
10307
- VAR10 (LOGICBINOP, vorn,
10308
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
10309
+#include "arm_neon_builtins.def"
10313
@@ -19666,9 +20074,36 @@
10317
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
10318
- symbolic names defined here (which would require too much duplication).
10320
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
10321
+#define VAR1(T, N, A) \
10323
+#define VAR2(T, N, A, B) \
10324
+ VAR1 (T, N, A), \
10326
+#define VAR3(T, N, A, B, C) \
10327
+ VAR2 (T, N, A, B), \
10329
+#define VAR4(T, N, A, B, C, D) \
10330
+ VAR3 (T, N, A, B, C), \
10332
+#define VAR5(T, N, A, B, C, D, E) \
10333
+ VAR4 (T, N, A, B, C, D), \
10335
+#define VAR6(T, N, A, B, C, D, E, F) \
10336
+ VAR5 (T, N, A, B, C, D, E), \
10338
+#define VAR7(T, N, A, B, C, D, E, F, G) \
10339
+ VAR6 (T, N, A, B, C, D, E, F), \
10341
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
10342
+ VAR7 (T, N, A, B, C, D, E, F, G), \
10344
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
10345
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
10347
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
10348
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
10352
ARM_BUILTIN_GETWCGR0,
10353
@@ -19917,11 +20352,25 @@
10355
ARM_BUILTIN_WMERGE,
10357
- ARM_BUILTIN_NEON_BASE,
10358
+#include "arm_neon_builtins.def"
10360
- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
10364
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
10378
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
10381
@@ -19932,6 +20381,7 @@
10383
tree neon_intQI_type_node;
10384
tree neon_intHI_type_node;
10385
+ tree neon_floatHF_type_node;
10386
tree neon_polyQI_type_node;
10387
tree neon_polyHI_type_node;
10388
tree neon_intSI_type_node;
10389
@@ -19958,6 +20408,7 @@
10391
tree V8QI_type_node;
10392
tree V4HI_type_node;
10393
+ tree V4HF_type_node;
10394
tree V2SI_type_node;
10395
tree V2SF_type_node;
10396
tree V16QI_type_node;
10397
@@ -20012,6 +20463,9 @@
10398
neon_float_type_node = make_node (REAL_TYPE);
10399
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
10400
layout_type (neon_float_type_node);
10401
+ neon_floatHF_type_node = make_node (REAL_TYPE);
10402
+ TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
10403
+ layout_type (neon_floatHF_type_node);
10405
/* Define typedefs which exactly correspond to the modes we are basing vector
10406
types on. If you change these names you'll need to change
10407
@@ -20020,6 +20474,8 @@
10408
"__builtin_neon_qi");
10409
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
10410
"__builtin_neon_hi");
10411
+ (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
10412
+ "__builtin_neon_hf");
10413
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
10414
"__builtin_neon_si");
10415
(*lang_hooks.types.register_builtin_type) (neon_float_type_node,
10416
@@ -20061,6 +20517,8 @@
10417
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
10419
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
10421
+ build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
10423
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
10425
@@ -20183,7 +20641,7 @@
10426
neon_builtin_datum *d = &neon_builtin_data[i];
10428
const char* const modenames[] = {
10429
- "v8qi", "v4hi", "v2si", "v2sf", "di",
10430
+ "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
10431
"v16qi", "v8hi", "v4si", "v4sf", "v2di",
10434
@@ -20386,8 +20844,9 @@
10435
case NEON_REINTERP:
10437
/* We iterate over 5 doubleword types, then 5 quadword
10439
- int rhs = d->mode % 5;
10440
+ types. V4HF is not a type used in reinterpret, so we translate
10441
+ d->mode to the correct index in reinterp_ftype_dreg. */
10442
+ int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
10443
switch (insn_data[d->code].operand[0].mode)
10445
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
10446
@@ -20404,7 +20863,38 @@
10450
+ case NEON_FLOAT_WIDEN:
10452
+ tree eltype = NULL_TREE;
10453
+ tree return_type = NULL_TREE;
10455
+ switch (insn_data[d->code].operand[1].mode)
10458
+ eltype = V4HF_type_node;
10459
+ return_type = V4SF_type_node;
10461
+ default: gcc_unreachable ();
10463
+ ftype = build_function_type_list (return_type, eltype, NULL);
10466
+ case NEON_FLOAT_NARROW:
10468
+ tree eltype = NULL_TREE;
10469
+ tree return_type = NULL_TREE;
10471
+ switch (insn_data[d->code].operand[1].mode)
10474
+ eltype = V4SF_type_node;
10475
+ return_type = V4HF_type_node;
10477
+ default: gcc_unreachable ();
10479
+ ftype = build_function_type_list (return_type, eltype, NULL);
10483
gcc_unreachable ();
10485
@@ -21401,6 +21891,8 @@
10489
+ case NEON_FLOAT_WIDEN:
10490
+ case NEON_FLOAT_NARROW:
10491
case NEON_REINTERP:
10492
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
10493
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
10494
@@ -21598,7 +22090,7 @@
10498
- int fcode = DECL_FUNCTION_CODE (fndecl);
10499
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10501
enum machine_mode tmode;
10502
enum machine_mode mode0;
10503
@@ -23764,6 +24256,8 @@
10506
thumb2_emit_ldrd_pop (saved_regs_mask);
10507
+ else if (TARGET_ARM && !IS_INTERRUPT (func_type))
10508
+ arm_emit_ldrd_pop (saved_regs_mask);
10510
arm_emit_multi_reg_pop (saved_regs_mask);
10512
@@ -25032,7 +25526,7 @@
10514
/* Neon also supports V2SImode, etc. listed in the clause below. */
10515
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
10516
- || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
10517
+ || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
10520
if ((TARGET_NEON || TARGET_IWMMXT)
10521
@@ -25847,6 +26341,7 @@
10529
@@ -25875,6 +26370,7 @@
10530
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
10531
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
10532
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
10533
+ { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
10534
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
10535
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
10536
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
10537
@@ -25973,6 +26469,60 @@
10538
return !TARGET_THUMB1;
10542
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
10544
+ enum machine_mode in_mode, out_mode;
10547
+ if (TREE_CODE (type_out) != VECTOR_TYPE
10548
+ || TREE_CODE (type_in) != VECTOR_TYPE
10549
+ || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
10550
+ return NULL_TREE;
10552
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
10553
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
10554
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
10555
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
10557
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
10558
+ decl of the vectorized builtin for the appropriate vector mode.
10559
+ NULL_TREE is returned if no such builtin is available. */
10560
+#undef ARM_CHECK_BUILTIN_MODE
10561
+#define ARM_CHECK_BUILTIN_MODE(C) \
10562
+ (out_mode == SFmode && out_n == C \
10563
+ && in_mode == SFmode && in_n == C)
10565
+#undef ARM_FIND_VRINT_VARIANT
10566
+#define ARM_FIND_VRINT_VARIANT(N) \
10567
+ (ARM_CHECK_BUILTIN_MODE (2) \
10568
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
10569
+ : (ARM_CHECK_BUILTIN_MODE (4) \
10570
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
10573
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
10575
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
10578
+ case BUILT_IN_FLOORF:
10579
+ return ARM_FIND_VRINT_VARIANT (vrintm);
10580
+ case BUILT_IN_CEILF:
10581
+ return ARM_FIND_VRINT_VARIANT (vrintp);
10582
+ case BUILT_IN_TRUNCF:
10583
+ return ARM_FIND_VRINT_VARIANT (vrintz);
10584
+ case BUILT_IN_ROUNDF:
10585
+ return ARM_FIND_VRINT_VARIANT (vrinta);
10587
+ return NULL_TREE;
10590
+ return NULL_TREE;
10592
+#undef ARM_CHECK_BUILTIN_MODE
10593
+#undef ARM_FIND_VRINT_VARIANT
10595
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
10596
static HOST_WIDE_INT
10597
arm_vector_alignment (const_tree type)
10598
@@ -26203,40 +26753,72 @@
10599
emit_insn (gen_memory_barrier ());
10602
-/* Emit the load-exclusive and store-exclusive instructions. */
10603
+/* Emit the load-exclusive and store-exclusive instructions.
10604
+ Use acquire and release versions if necessary. */
10607
-arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
10608
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
10610
rtx (*gen) (rtx, rtx);
10615
- case QImode: gen = gen_arm_load_exclusiveqi; break;
10616
- case HImode: gen = gen_arm_load_exclusivehi; break;
10617
- case SImode: gen = gen_arm_load_exclusivesi; break;
10618
- case DImode: gen = gen_arm_load_exclusivedi; break;
10620
- gcc_unreachable ();
10623
+ case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
10624
+ case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
10625
+ case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
10626
+ case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
10628
+ gcc_unreachable ();
10635
+ case QImode: gen = gen_arm_load_exclusiveqi; break;
10636
+ case HImode: gen = gen_arm_load_exclusivehi; break;
10637
+ case SImode: gen = gen_arm_load_exclusivesi; break;
10638
+ case DImode: gen = gen_arm_load_exclusivedi; break;
10640
+ gcc_unreachable ();
10644
emit_insn (gen (rval, mem));
10648
-arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
10649
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
10650
+ rtx mem, bool rel)
10652
rtx (*gen) (rtx, rtx, rtx);
10657
- case QImode: gen = gen_arm_store_exclusiveqi; break;
10658
- case HImode: gen = gen_arm_store_exclusivehi; break;
10659
- case SImode: gen = gen_arm_store_exclusivesi; break;
10660
- case DImode: gen = gen_arm_store_exclusivedi; break;
10662
- gcc_unreachable ();
10665
+ case QImode: gen = gen_arm_store_release_exclusiveqi; break;
10666
+ case HImode: gen = gen_arm_store_release_exclusivehi; break;
10667
+ case SImode: gen = gen_arm_store_release_exclusivesi; break;
10668
+ case DImode: gen = gen_arm_store_release_exclusivedi; break;
10670
+ gcc_unreachable ();
10677
+ case QImode: gen = gen_arm_store_exclusiveqi; break;
10678
+ case HImode: gen = gen_arm_store_exclusivehi; break;
10679
+ case SImode: gen = gen_arm_store_exclusivesi; break;
10680
+ case DImode: gen = gen_arm_store_exclusivedi; break;
10682
+ gcc_unreachable ();
10686
emit_insn (gen (bval, rval, mem));
10688
@@ -26271,6 +26853,15 @@
10689
mod_f = operands[7];
10690
mode = GET_MODE (mem);
10692
+ /* Normally the succ memory model must be stronger than fail, but in the
10693
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
10694
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
10696
+ if (TARGET_HAVE_LDACQ
10697
+ && INTVAL (mod_f) == MEMMODEL_ACQUIRE
10698
+ && INTVAL (mod_s) == MEMMODEL_RELEASE)
10699
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
10704
@@ -26345,8 +26936,20 @@
10705
scratch = operands[7];
10706
mode = GET_MODE (mem);
10708
- arm_pre_atomic_barrier (mod_s);
10709
+ bool use_acquire = TARGET_HAVE_LDACQ
10710
+ && !(mod_s == MEMMODEL_RELAXED
10711
+ || mod_s == MEMMODEL_CONSUME
10712
+ || mod_s == MEMMODEL_RELEASE);
10714
+ bool use_release = TARGET_HAVE_LDACQ
10715
+ && !(mod_s == MEMMODEL_RELAXED
10716
+ || mod_s == MEMMODEL_CONSUME
10717
+ || mod_s == MEMMODEL_ACQUIRE);
10719
+ /* Checks whether a barrier is needed and emits one accordingly. */
10720
+ if (!(use_acquire || use_release))
10721
+ arm_pre_atomic_barrier (mod_s);
10726
@@ -26355,7 +26958,7 @@
10728
label2 = gen_label_rtx ();
10730
- arm_emit_load_exclusive (mode, rval, mem);
10731
+ arm_emit_load_exclusive (mode, rval, mem, use_acquire);
10733
cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
10734
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10735
@@ -26363,7 +26966,7 @@
10736
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
10737
emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
10739
- arm_emit_store_exclusive (mode, scratch, mem, newval);
10740
+ arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
10742
/* Weak or strong, we want EQ to be true for success, so that we
10743
match the flags that we got from the compare above. */
10744
@@ -26382,7 +26985,9 @@
10745
if (mod_f != MEMMODEL_RELAXED)
10746
emit_label (label2);
10748
- arm_post_atomic_barrier (mod_s);
10749
+ /* Checks whether a barrier is needed and emits one accordingly. */
10750
+ if (!(use_acquire || use_release))
10751
+ arm_post_atomic_barrier (mod_s);
10753
if (mod_f == MEMMODEL_RELAXED)
10754
emit_label (label2);
10755
@@ -26397,8 +27002,20 @@
10756
enum machine_mode wmode = (mode == DImode ? DImode : SImode);
10759
- arm_pre_atomic_barrier (model);
10760
+ bool use_acquire = TARGET_HAVE_LDACQ
10761
+ && !(model == MEMMODEL_RELAXED
10762
+ || model == MEMMODEL_CONSUME
10763
+ || model == MEMMODEL_RELEASE);
10765
+ bool use_release = TARGET_HAVE_LDACQ
10766
+ && !(model == MEMMODEL_RELAXED
10767
+ || model == MEMMODEL_CONSUME
10768
+ || model == MEMMODEL_ACQUIRE);
10770
+ /* Checks whether a barrier is needed and emits one accordingly. */
10771
+ if (!(use_acquire || use_release))
10772
+ arm_pre_atomic_barrier (model);
10774
label = gen_label_rtx ();
10775
emit_label (label);
10777
@@ -26410,7 +27027,7 @@
10779
value = simplify_gen_subreg (wmode, value, mode, 0);
10781
- arm_emit_load_exclusive (mode, old_out, mem);
10782
+ arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
10786
@@ -26458,12 +27075,15 @@
10790
- arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
10791
+ arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
10794
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10795
emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
10797
- arm_post_atomic_barrier (model);
10798
+ /* Checks whether a barrier is needed and emits one accordingly. */
10799
+ if (!(use_acquire || use_release))
10800
+ arm_post_atomic_barrier (model);
10803
#define MAX_VECT_LEN 16
10804
--- a/src/gcc/config/arm/arm.h
10805
+++ b/src/gcc/config/arm/arm.h
10806
@@ -350,10 +350,16 @@
10807
#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \
10810
+/* Nonzero if this chip supports load-acquire and store-release. */
10811
+#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8)
10813
/* Nonzero if integer division instructions supported. */
10814
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
10815
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
10817
+/* Should NEON be used for 64-bits bitops. */
10818
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
10820
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
10821
then TARGET_AAPCS_BASED must be true -- but the converse does not
10822
hold. TARGET_BPABI implies the use of the BPABI runtime library,
10823
@@ -539,6 +545,10 @@
10824
/* Nonzero if chip supports integer division instruction in Thumb mode. */
10825
extern int arm_arch_thumb_hwdiv;
10827
+/* Nonzero if we should use Neon to handle 64-bits operations rather
10828
+ than core registers. */
10829
+extern int prefer_neon_for_64bits;
10831
#ifndef TARGET_DEFAULT
10832
#define TARGET_DEFAULT (MASK_APCS_FRAME)
10834
@@ -1040,7 +1050,7 @@
10835
/* Modes valid for Neon D registers. */
10836
#define VALID_NEON_DREG_MODE(MODE) \
10837
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
10838
- || (MODE) == V2SFmode || (MODE) == DImode)
10839
+ || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
10841
/* Modes valid for Neon Q registers. */
10842
#define VALID_NEON_QREG_MODE(MODE) \
10843
--- a/src/gcc/config/arm/unspecs.md
10844
+++ b/src/gcc/config/arm/unspecs.md
10846
; FPSCR rounding mode and signal inexactness.
10847
UNSPEC_VRINTA ; Represent a float to integral float rounding
10848
; towards nearest, ties away from zero.
10849
+ UNSPEC_RRX ; Rotate Right with Extend shifts register right
10850
+ ; by one place, with Carry flag shifted into bit[31].
10853
(define_c_enum "unspec" [
10854
@@ -139,6 +141,10 @@
10855
VUNSPEC_ATOMIC_OP ; Represent an atomic operation.
10856
VUNSPEC_LL ; Represent a load-register-exclusive.
10857
VUNSPEC_SC ; Represent a store-register-exclusive.
10858
+ VUNSPEC_LAX ; Represent a load-register-acquire-exclusive.
10859
+ VUNSPEC_SLX ; Represent a store-register-release-exclusive.
10860
+ VUNSPEC_LDA ; Represent a store-register-acquire.
10861
+ VUNSPEC_STL ; Represent a store-register-release.
10864
;; Enumerators for NEON unspecs.
10865
--- a/src/gcc/config/arm/arm-cores.def
10866
+++ b/src/gcc/config/arm/arm-cores.def
10867
@@ -129,9 +129,11 @@
10868
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
10869
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
10870
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
10871
+ARM_CORE("cortex-a53", cortexa53, 8A, FL_LDSCHED, cortex_a5)
10872
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
10873
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
10874
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
10875
+ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
10876
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
10877
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
10878
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
10879
--- a/src/gcc/config/arm/arm-tune.md
10880
+++ b/src/gcc/config/arm/arm-tune.md
10882
;; -*- buffer-read-only: t -*-
10883
;; Generated automatically by gentune.sh from arm-cores.def
10884
(define_attr "tune"
10885
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
10886
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexr4,cortexr4f,cortexr5,cortexr7,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4"
10887
(const (symbol_ref "((enum attr_tune) arm_tune)")))
10888
--- a/src/gcc/config/arm/arm-protos.h
10889
+++ b/src/gcc/config/arm/arm-protos.h
10891
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
10892
rtx (*) (rtx, rtx, rtx));
10893
extern rtx neon_make_constant (rtx);
10894
+extern tree arm_builtin_vectorized_function (tree, tree, tree);
10895
extern void neon_expand_vector_init (rtx, rtx);
10896
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
10897
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
10898
@@ -117,6 +118,7 @@
10899
extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
10900
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
10901
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
10902
+extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
10903
extern int arm_gen_movmemqi (rtx *);
10904
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
10905
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
10906
@@ -269,6 +271,8 @@
10907
bool logical_op_non_short_circuit[2];
10908
/* Vectorizer costs. */
10909
const struct cpu_vec_costs* vec_costs;
10910
+ /* Prefer Neon for 64-bit bitops. */
10911
+ bool prefer_neon_for_64bits;
10914
extern const struct tune_params *current_tune;
10915
--- a/src/gcc/config/arm/vfp.md
10916
+++ b/src/gcc/config/arm/vfp.md
10917
@@ -132,8 +132,8 @@
10920
(define_insn "*movdi_vfp"
10921
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv")
10922
- (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
10923
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
10924
+ (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
10925
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8
10926
&& ( register_operand (operands[0], DImode)
10927
|| register_operand (operands[1], DImode))
10928
--- a/src/gcc/config/arm/neon.md
10929
+++ b/src/gcc/config/arm/neon.md
10930
@@ -487,7 +487,7 @@
10931
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
10932
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
10933
(set_attr "length" "*,8,8,*,8,8,8")
10934
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
10935
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
10938
(define_insn "*sub<mode>3_neon"
10939
@@ -524,7 +524,7 @@
10940
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
10941
(set_attr "conds" "*,clob,clob,clob,*")
10942
(set_attr "length" "*,8,8,8,*")
10943
- (set_attr "arch" "nota8,*,*,*,onlya8")]
10944
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
10947
(define_insn "*mul<mode>3_neon"
10948
@@ -699,7 +699,7 @@
10950
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
10951
(set_attr "length" "*,*,8,8,*,*")
10952
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
10953
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
10956
;; The concrete forms of the Neon immediate-logic instructions are vbic and
10957
@@ -724,29 +724,6 @@
10958
[(set_attr "neon_type" "neon_int_1")]
10961
-(define_insn "anddi3_neon"
10962
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
10963
- (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
10964
- (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))]
10967
- switch (which_alternative)
10969
- case 0: /* fall through */
10970
- case 4: return "vand\t%P0, %P1, %P2";
10971
- case 1: /* fall through */
10972
- case 5: return neon_output_logic_immediate ("vand", &operands[2],
10973
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
10974
- case 2: return "#";
10975
- case 3: return "#";
10976
- default: gcc_unreachable ();
10979
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
10980
- (set_attr "length" "*,*,8,8,*,*")
10981
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
10984
(define_insn "orn<mode>3_neon"
10985
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
10986
(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
10987
@@ -840,7 +817,7 @@
10988
veor\t%P0, %P1, %P2"
10989
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
10990
(set_attr "length" "*,8,8,*")
10991
- (set_attr "arch" "nota8,*,*,onlya8")]
10992
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
10995
(define_insn "one_cmpl<mode>2"
10996
@@ -1162,7 +1139,7 @@
11000
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
11001
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
11002
(set_attr "opt" "*,*,speed,speed,*,*")]
11005
@@ -1263,7 +1240,7 @@
11009
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
11010
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
11011
(set_attr "opt" "*,*,speed,speed,*,*")]
11014
@@ -3217,6 +3194,24 @@
11015
(const_string "neon_fp_vadd_qqq_vabs_qq")))]
11018
+(define_insn "neon_vcvtv4sfv4hf"
11019
+ [(set (match_operand:V4SF 0 "s_register_operand" "=w")
11020
+ (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
11022
+ "TARGET_NEON && TARGET_FP16"
11023
+ "vcvt.f32.f16\t%q0, %P1"
11024
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
11027
+(define_insn "neon_vcvtv4hfv4sf"
11028
+ [(set (match_operand:V4HF 0 "s_register_operand" "=w")
11029
+ (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
11031
+ "TARGET_NEON && TARGET_FP16"
11032
+ "vcvt.f16.f32\t%P0, %q1"
11033
+ [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
11036
(define_insn "neon_vcvt_n<mode>"
11037
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
11038
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
11039
@@ -5547,7 +5542,7 @@
11040
(match_operand:SI 3 "immediate_operand" "")]
11043
- emit_insn (gen_and<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
11044
+ emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
11048
--- a/src/gcc/config/arm/arm_neon_builtins.def
11049
+++ b/src/gcc/config/arm/arm_neon_builtins.def
11051
+/* NEON builtin definitions for ARM.
11052
+ Copyright (C) 2013
11053
+ Free Software Foundation, Inc.
11054
+ Contributed by ARM Ltd.
11056
+ This file is part of GCC.
11058
+ GCC is free software; you can redistribute it and/or modify it
11059
+ under the terms of the GNU General Public License as published
11060
+ by the Free Software Foundation; either version 3, or (at your
11061
+ option) any later version.
11063
+ GCC is distributed in the hope that it will be useful, but WITHOUT
11064
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11065
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
11066
+ License for more details.
11068
+ You should have received a copy of the GNU General Public License
11069
+ along with GCC; see the file COPYING3. If not see
11070
+ <http://www.gnu.org/licenses/>. */
11072
+VAR10 (BINOP, vadd,
11073
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11074
+VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
11075
+VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
11076
+VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11077
+VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11078
+VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
11079
+VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11080
+VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11081
+VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
11082
+VAR2 (TERNOP, vfma, v2sf, v4sf),
11083
+VAR2 (TERNOP, vfms, v2sf, v4sf),
11084
+VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11085
+VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
11086
+VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
11087
+VAR2 (TERNOP, vqdmlal, v4hi, v2si),
11088
+VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
11089
+VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
11090
+VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
11091
+VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
11092
+VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
11093
+VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
11094
+VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
11095
+VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
11096
+VAR2 (BINOP, vqdmull, v4hi, v2si),
11097
+VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11098
+VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11099
+VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11100
+VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
11101
+VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
11102
+VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
11103
+VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11104
+VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11105
+VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11106
+VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
11107
+VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11108
+VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11109
+VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
11110
+VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
11111
+VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11112
+VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11113
+VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
11114
+VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11115
+VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11116
+VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11117
+VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11118
+VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11119
+VAR2 (BINOP, vcage, v2sf, v4sf),
11120
+VAR2 (BINOP, vcagt, v2sf, v4sf),
11121
+VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11122
+VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11123
+VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
11124
+VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11125
+VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
11126
+VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11127
+VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11128
+VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
11129
+VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11130
+VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11131
+VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
11132
+VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
11133
+VAR2 (BINOP, vrecps, v2sf, v4sf),
11134
+VAR2 (BINOP, vrsqrts, v2sf, v4sf),
11135
+VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11136
+VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
11137
+VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11138
+VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11139
+VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11140
+VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11141
+VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11142
+VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11143
+VAR2 (UNOP, vcnt, v8qi, v16qi),
11144
+VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
11145
+VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
11146
+VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
11147
+ /* FIXME: vget_lane supports more variants than this! */
11148
+VAR10 (GETLANE, vget_lane,
11149
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11150
+VAR10 (SETLANE, vset_lane,
11151
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11152
+VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
11153
+VAR10 (DUP, vdup_n,
11154
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11155
+VAR10 (DUPLANE, vdup_lane,
11156
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11157
+VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
11158
+VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
11159
+VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
11160
+VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
11161
+VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
11162
+VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
11163
+VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
11164
+VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11165
+VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11166
+VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
11167
+VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
11168
+VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11169
+VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
11170
+VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
11171
+VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11172
+VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11173
+VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
11174
+VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
11175
+VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11176
+VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
11177
+VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
11178
+VAR10 (BINOP, vext,
11179
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11180
+VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11181
+VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
11182
+VAR2 (UNOP, vrev16, v8qi, v16qi),
11183
+VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
11184
+VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
11185
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
11186
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
11187
+VAR10 (SELECT, vbsl,
11188
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11189
+VAR2 (RINT, vrintn, v2sf, v4sf),
11190
+VAR2 (RINT, vrinta, v2sf, v4sf),
11191
+VAR2 (RINT, vrintp, v2sf, v4sf),
11192
+VAR2 (RINT, vrintm, v2sf, v4sf),
11193
+VAR2 (RINT, vrintz, v2sf, v4sf),
11194
+VAR2 (RINT, vrintx, v2sf, v4sf),
11195
+VAR1 (VTBL, vtbl1, v8qi),
11196
+VAR1 (VTBL, vtbl2, v8qi),
11197
+VAR1 (VTBL, vtbl3, v8qi),
11198
+VAR1 (VTBL, vtbl4, v8qi),
11199
+VAR1 (VTBX, vtbx1, v8qi),
11200
+VAR1 (VTBX, vtbx2, v8qi),
11201
+VAR1 (VTBX, vtbx3, v8qi),
11202
+VAR1 (VTBX, vtbx4, v8qi),
11203
+VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11204
+VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11205
+VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
11206
+VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
11207
+VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
11208
+VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
11209
+VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
11210
+VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
11211
+VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
11212
+VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
11213
+VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
11214
+VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
11215
+VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
11216
+VAR10 (LOAD1, vld1,
11217
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11218
+VAR10 (LOAD1LANE, vld1_lane,
11219
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11220
+VAR10 (LOAD1, vld1_dup,
11221
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11222
+VAR10 (STORE1, vst1,
11223
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11224
+VAR10 (STORE1LANE, vst1_lane,
11225
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11227
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11228
+VAR7 (LOADSTRUCTLANE, vld2_lane,
11229
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11230
+VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
11231
+VAR9 (STORESTRUCT, vst2,
11232
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11233
+VAR7 (STORESTRUCTLANE, vst2_lane,
11234
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11236
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11237
+VAR7 (LOADSTRUCTLANE, vld3_lane,
11238
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11239
+VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
11240
+VAR9 (STORESTRUCT, vst3,
11241
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11242
+VAR7 (STORESTRUCTLANE, vst3_lane,
11243
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11244
+VAR9 (LOADSTRUCT, vld4,
11245
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11246
+VAR7 (LOADSTRUCTLANE, vld4_lane,
11247
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11248
+VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
11249
+VAR9 (STORESTRUCT, vst4,
11250
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
11251
+VAR7 (STORESTRUCTLANE, vst4_lane,
11252
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
11253
+VAR10 (LOGICBINOP, vand,
11254
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11255
+VAR10 (LOGICBINOP, vorr,
11256
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11257
+VAR10 (BINOP, veor,
11258
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11259
+VAR10 (LOGICBINOP, vbic,
11260
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
11261
+VAR10 (LOGICBINOP, vorn,
11262
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
11263
--- a/src/gcc/config/arm/neon.ml
11264
+++ b/src/gcc/config/arm/neon.ml
11266
<http://www.gnu.org/licenses/>. *)
11268
(* Shorthand types for vector elements. *)
11269
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
11270
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
11271
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
11272
| Cast of elts * elts | NoElts
11275
| T_uint16x4 | T_uint16x8
11276
| T_uint32x2 | T_uint32x4
11277
| T_uint64x1 | T_uint64x2
11279
| T_float32x2 | T_float32x4
11280
| T_poly8x8 | T_poly8x16
11281
| T_poly16x4 | T_poly16x8
11282
@@ -46,11 +47,13 @@
11283
| T_uint8 | T_uint16
11284
| T_uint32 | T_uint64
11285
| T_poly8 | T_poly16
11286
- | T_float32 | T_arrayof of int * vectype
11287
+ | T_float16 | T_float32
11288
+ | T_arrayof of int * vectype
11289
| T_ptrto of vectype | T_const of vectype
11291
| T_intHI | T_intSI
11292
- | T_intDI | T_floatSF
11293
+ | T_intDI | T_floatHF
11296
(* The meanings of the following are:
11297
TImode : "Tetra", two registers (four words).
11299
| Arity3 of vectype * vectype * vectype * vectype
11300
| Arity4 of vectype * vectype * vectype * vectype * vectype
11302
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
11303
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
11304
| V16QI | V8HI | V4SI | V4SF | V2DI
11305
| QI | HI | SI | SF
11307
@@ -284,18 +287,22 @@
11309
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
11310
| Requires_feature of string
11311
+ (* Mark that the intrinsic requires a particular architecture version. *)
11312
| Requires_arch of int
11313
+ (* Mark that the intrinsic requires a particular bit in __ARM_FP to
11315
+ | Requires_FP_bit of int
11317
exception MixedMode of elts * elts
11319
let rec elt_width = function
11320
S8 | U8 | P8 | I8 | B8 -> 8
11321
- | S16 | U16 | P16 | I16 | B16 -> 16
11322
+ | S16 | U16 | P16 | I16 | B16 | F16 -> 16
11323
| S32 | F32 | U32 | I32 | B32 -> 32
11324
| S64 | U64 | I64 | B64 -> 64
11326
let wa = elt_width a and wb = elt_width b in
11327
- if wa = wb then wa else failwith "element width?"
11328
+ if wa = wb then wa else raise (MixedMode (a, b))
11329
| Cast (a, b) -> raise (MixedMode (a, b))
11330
| NoElts -> failwith "No elts"
11332
@@ -303,7 +310,7 @@
11333
S8 | S16 | S32 | S64 -> Signed
11334
| U8 | U16 | U32 | U64 -> Unsigned
11337
+ | F16 | F32 -> Float
11338
| I8 | I16 | I32 | I64 -> Int
11339
| B8 | B16 | B32 | B64 -> Bits
11340
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
11341
@@ -315,6 +322,7 @@
11342
| Signed, 16 -> S16
11343
| Signed, 32 -> S32
11344
| Signed, 64 -> S64
11345
+ | Float, 16 -> F16
11347
| Unsigned, 8 -> U8
11348
| Unsigned, 16 -> U16
11349
@@ -384,7 +392,12 @@
11351
scan ((Array.length operands) - 1)
11353
-let rec mode_of_elt elt shape =
11354
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
11355
+ SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
11356
+ for the given argument position, else determine which argument to return a
11357
+ mode for automatically. *)
11359
+let rec mode_of_elt ?argpos elt shape =
11360
let flt = match elt_class elt with
11361
Float | ConvClass(_, Float) -> true | _ -> false in
11363
@@ -394,7 +407,10 @@
11364
in match shape with
11365
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
11366
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
11367
- [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
11369
+ [| V8QI; V4HF; V2SF; DI |].(idx)
11371
+ [| V8QI; V4HI; V2SI; DI |].(idx)
11372
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
11373
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
11374
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
11375
@@ -404,7 +420,11 @@
11377
[| V8QI; V4HI; V2SI; DI |].(idx)
11378
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
11379
- | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
11380
+ | Use_operands ops ->
11381
+ begin match argpos with
11382
+ None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
11383
+ | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
11385
| _ -> failwith "invalid shape"
11387
(* Modify an element type dependent on the shape of the instruction and the
11388
@@ -454,10 +474,11 @@
11389
| U16 -> T_uint16x4
11390
| U32 -> T_uint32x2
11391
| U64 -> T_uint64x1
11392
+ | F16 -> T_float16x4
11393
| F32 -> T_float32x2
11395
| P16 -> T_poly16x4
11396
- | _ -> failwith "Bad elt type"
11397
+ | _ -> failwith "Bad elt type for Dreg"
11400
begin match elt with
11401
@@ -472,7 +493,7 @@
11402
| F32 -> T_float32x4
11404
| P16 -> T_poly16x8
11405
- | _ -> failwith "Bad elt type"
11406
+ | _ -> failwith "Bad elt type for Qreg"
11409
begin match elt with
11410
@@ -487,7 +508,7 @@
11414
- | _ -> failwith "Bad elt type"
11415
+ | _ -> failwith "Bad elt type for Corereg"
11419
@@ -506,7 +527,7 @@
11420
let vectype_size = function
11421
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
11422
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
11423
- | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
11424
+ | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
11425
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
11426
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
11427
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
11428
@@ -1217,6 +1238,10 @@
11429
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
11430
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
11431
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
11432
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
11433
+ Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
11434
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
11435
+ Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
11436
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
11437
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
11438
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
11439
@@ -1782,7 +1807,7 @@
11440
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
11441
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
11442
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
11443
- | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
11444
+ | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
11445
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
11446
| NoElts -> failwith "No elts"
11448
@@ -1809,6 +1834,7 @@
11449
| T_uint32x4 -> affix "uint32x4"
11450
| T_uint64x1 -> affix "uint64x1"
11451
| T_uint64x2 -> affix "uint64x2"
11452
+ | T_float16x4 -> affix "float16x4"
11453
| T_float32x2 -> affix "float32x2"
11454
| T_float32x4 -> affix "float32x4"
11455
| T_poly8x8 -> affix "poly8x8"
11456
@@ -1825,6 +1851,7 @@
11457
| T_uint64 -> affix "uint64"
11458
| T_poly8 -> affix "poly8"
11459
| T_poly16 -> affix "poly16"
11460
+ | T_float16 -> affix "float16"
11461
| T_float32 -> affix "float32"
11462
| T_immediate _ -> "const int"
11464
@@ -1832,6 +1859,7 @@
11465
| T_intHI -> "__builtin_neon_hi"
11466
| T_intSI -> "__builtin_neon_si"
11467
| T_intDI -> "__builtin_neon_di"
11468
+ | T_floatHF -> "__builtin_neon_hf"
11469
| T_floatSF -> "__builtin_neon_sf"
11470
| T_arrayof (num, base) ->
11471
let basename = name (fun x -> x) base in
11472
@@ -1853,10 +1881,10 @@
11473
| B_XImode -> "__builtin_neon_xi"
11475
let string_of_mode = function
11476
- V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
11477
- | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
11478
- | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
11480
+ V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
11481
+ | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
11482
+ | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
11483
+ | HI -> "hi" | SI -> "si" | SF -> "sf"
11485
(* Use uppercase chars for letters which form part of the intrinsic name, but
11486
should be omitted from the builtin name (the info is passed in an extra
11487
--- a/src/gcc/config/arm/constraints.md
11488
+++ b/src/gcc/config/arm/constraints.md
11490
;; The following register constraints have been used:
11491
;; - in ARM/Thumb-2 state: t, w, x, y, z
11492
;; - in Thumb state: h, b
11493
-;; - in both states: l, c, k
11494
+;; - in both states: l, c, k, q
11495
;; In ARM state, 'l' is an alias for 'r'
11496
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
11499
(define_register_constraint "k" "STACK_REG"
11500
"@internal The stack register.")
11502
+(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS"
11503
+ "@internal In ARM state with LDRD support, core registers, otherwise general registers.")
11505
(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
11507
Thumb only. The union of the low registers and the stack register.")
11508
@@ -248,6 +251,12 @@
11509
(and (match_code "const_int")
11510
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
11512
+(define_constraint "De"
11514
+ In ARM/Thumb-2 state a const_int that can be used by insn anddi."
11515
+ (and (match_code "const_int")
11516
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
11518
(define_constraint "Di"
11520
In ARM/Thumb-2 state a const_int or const_double where both the high
11521
--- a/src/gcc/config/arm/arm-arches.def
11522
+++ b/src/gcc/config/arm/arm-arches.def
11524
ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R)
11525
ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M)
11526
ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM)
11527
-ARM_ARCH("armv8-a", cortexa15, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
11528
+ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
11529
ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
11530
ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
11531
--- a/src/gcc/config/arm/t-arm
11532
+++ b/src/gcc/config/arm/t-arm
11534
$(srcdir)/config/arm/cortex-a8-neon.md \
11535
$(srcdir)/config/arm/cortex-a9.md \
11536
$(srcdir)/config/arm/cortex-a9-neon.md \
11537
+ $(srcdir)/config/arm/cortex-a53.md \
11538
$(srcdir)/config/arm/cortex-m4-fpu.md \
11539
$(srcdir)/config/arm/cortex-m4.md \
11540
$(srcdir)/config/arm/cortex-r4f.md \
11542
$(srcdir)/config/arm/iwmmxt.md \
11543
$(srcdir)/config/arm/iwmmxt2.md \
11544
$(srcdir)/config/arm/ldmstm.md \
11545
+ $(srcdir)/config/arm/ldrdstrd.md \
11546
$(srcdir)/config/arm/marvell-f-iwmmxt.md \
11547
$(srcdir)/config/arm/neon.md \
11548
$(srcdir)/config/arm/predicates.md \
11550
$(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
11551
$(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
11552
intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
11553
- $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def
11554
+ $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
11555
+ $(srcdir)/config/arm/arm_neon_builtins.def
11557
arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
11558
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
11559
--- a/src/gcc/config/arm/arm.opt
11560
+++ b/src/gcc/config/arm/arm.opt
11561
@@ -247,3 +247,7 @@
11563
Target Report Var(unaligned_access) Init(2)
11564
Enable unaligned word and halfword accesses to packed data.
11567
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
11568
+Use Neon to perform 64-bits operations rather than core registers.
11569
--- a/src/gcc/config/arm/ldrdstrd.md
11570
+++ b/src/gcc/config/arm/ldrdstrd.md
11572
+;; ARM ldrd/strd peephole optimizations.
11574
+;; Copyright (C) 2013 Free Software Foundation, Inc.
11576
+;; Written by Greta Yorsh <greta.yorsh@arm.com>
11578
+;; This file is part of GCC.
11580
+;; GCC is free software; you can redistribute it and/or modify it
11581
+;; under the terms of the GNU General Public License as published by
11582
+;; the Free Software Foundation; either version 3, or (at your option)
11583
+;; any later version.
11585
+;; GCC is distributed in the hope that it will be useful, but
11586
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
11587
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11588
+;; General Public License for more details.
11590
+;; You should have received a copy of the GNU General Public License
11591
+;; along with GCC; see the file COPYING3. If not see
11592
+;; <http://www.gnu.org/licenses/>.
11594
+;; The following peephole optimizations identify consecutive memory
11595
+;; accesses, and try to rearrange the operands to enable generation of
11598
+(define_peephole2 ; ldrd
11599
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
11600
+ (match_operand:SI 2 "memory_operand" ""))
11601
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
11602
+ (match_operand:SI 3 "memory_operand" ""))]
11604
+ && current_tune->prefer_ldrd_strd
11605
+ && !optimize_function_for_size_p (cfun)"
11608
+ if (!gen_operands_ldrd_strd (operands, true, false, false))
11610
+ else if (TARGET_ARM)
11612
+ /* In ARM state, the destination registers of LDRD/STRD must be
11613
+ consecutive. We emit DImode access. */
11614
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
11615
+ operands[2] = adjust_address (operands[2], DImode, 0);
11616
+ /* Emit [(set (match_dup 0) (match_dup 2))] */
11617
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
11620
+ else if (TARGET_THUMB2)
11622
+ /* Emit the pattern:
11623
+ [(parallel [(set (match_dup 0) (match_dup 2))
11624
+ (set (match_dup 1) (match_dup 3))])] */
11625
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
11626
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
11627
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11632
+(define_peephole2 ; strd
11633
+ [(set (match_operand:SI 2 "memory_operand" "")
11634
+ (match_operand:SI 0 "arm_general_register_operand" ""))
11635
+ (set (match_operand:SI 3 "memory_operand" "")
11636
+ (match_operand:SI 1 "arm_general_register_operand" ""))]
11638
+ && current_tune->prefer_ldrd_strd
11639
+ && !optimize_function_for_size_p (cfun)"
11642
+ if (!gen_operands_ldrd_strd (operands, false, false, false))
11644
+ else if (TARGET_ARM)
11646
+ /* In ARM state, the destination registers of LDRD/STRD must be
11647
+ consecutive. We emit DImode access. */
11648
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
11649
+ operands[2] = adjust_address (operands[2], DImode, 0);
11650
+ /* Emit [(set (match_dup 2) (match_dup 0))] */
11651
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
11654
+ else if (TARGET_THUMB2)
11656
+ /* Emit the pattern:
11657
+ [(parallel [(set (match_dup 2) (match_dup 0))
11658
+ (set (match_dup 3) (match_dup 1))])] */
11659
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
11660
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
11661
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11666
+;; The following peepholes reorder registers to enable LDRD/STRD.
11667
+(define_peephole2 ; strd of constants
11668
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
11669
+ (match_operand:SI 4 "const_int_operand" ""))
11670
+ (set (match_operand:SI 2 "memory_operand" "")
11672
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
11673
+ (match_operand:SI 5 "const_int_operand" ""))
11674
+ (set (match_operand:SI 3 "memory_operand" "")
11677
+ && current_tune->prefer_ldrd_strd
11678
+ && !optimize_function_for_size_p (cfun)"
11681
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
11683
+ else if (TARGET_ARM)
11685
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
11686
+ operands[2] = adjust_address (operands[2], DImode, 0);
11687
+ /* Emit the pattern:
11688
+ [(set (match_dup 0) (match_dup 4))
11689
+ (set (match_dup 1) (match_dup 5))
11690
+ (set (match_dup 2) tmp)] */
11691
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
11692
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
11693
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
11696
+ else if (TARGET_THUMB2)
11698
+ /* Emit the pattern:
11699
+ [(set (match_dup 0) (match_dup 4))
11700
+ (set (match_dup 1) (match_dup 5))
11701
+ (parallel [(set (match_dup 2) (match_dup 0))
11702
+ (set (match_dup 3) (match_dup 1))])] */
11703
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
11704
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
11705
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
11706
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
11707
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11712
+(define_peephole2 ; strd of constants
11713
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
11714
+ (match_operand:SI 4 "const_int_operand" ""))
11715
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
11716
+ (match_operand:SI 5 "const_int_operand" ""))
11717
+ (set (match_operand:SI 2 "memory_operand" "")
11719
+ (set (match_operand:SI 3 "memory_operand" "")
11722
+ && current_tune->prefer_ldrd_strd
11723
+ && !optimize_function_for_size_p (cfun)"
11726
+ if (!gen_operands_ldrd_strd (operands, false, true, false))
11728
+ else if (TARGET_ARM)
11730
+ rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
11731
+ operands[2] = adjust_address (operands[2], DImode, 0);
11732
+ /* Emit the pattern
11733
+ [(set (match_dup 0) (match_dup 4))
11734
+ (set (match_dup 1) (match_dup 5))
11735
+ (set (match_dup 2) tmp)] */
11736
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
11737
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
11738
+ emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
11741
+ else if (TARGET_THUMB2)
11743
+ /* Emit the pattern:
11744
+ [(set (match_dup 0) (match_dup 4))
11745
+ (set (match_dup 1) (match_dup 5))
11746
+ (parallel [(set (match_dup 2) (match_dup 0))
11747
+ (set (match_dup 3) (match_dup 1))])] */
11748
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
11749
+ emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
11750
+ rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
11751
+ rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
11752
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11757
+;; The following two peephole optimizations are only relevant for ARM
11758
+;; mode where LDRD/STRD require consecutive registers.
11760
+(define_peephole2 ; swap the destination registers of two loads
11761
+ ; before a commutative operation.
11762
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
11763
+ (match_operand:SI 2 "memory_operand" ""))
11764
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
11765
+ (match_operand:SI 3 "memory_operand" ""))
11766
+ (set (match_operand:SI 4 "arm_general_register_operand" "")
11767
+ (match_operator:SI 5 "commutative_binary_operator"
11768
+ [(match_operand 6 "arm_general_register_operand" "")
11769
+ (match_operand 7 "arm_general_register_operand" "") ]))]
11770
+ "TARGET_LDRD && TARGET_ARM
11771
+ && current_tune->prefer_ldrd_strd
11772
+ && !optimize_function_for_size_p (cfun)
11773
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
11774
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
11775
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
11776
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
11777
+ [(set (match_dup 0) (match_dup 2))
11778
+ (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
11780
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
11786
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
11787
+ operands[2] = adjust_address (operands[2], DImode, 0);
11792
+(define_peephole2 ; swap the destination registers of two loads
11793
+ ; before a commutative operation that sets the flags.
11794
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
11795
+ (match_operand:SI 2 "memory_operand" ""))
11796
+ (set (match_operand:SI 1 "arm_general_register_operand" "")
11797
+ (match_operand:SI 3 "memory_operand" ""))
11799
+ [(set (match_operand:SI 4 "arm_general_register_operand" "")
11800
+ (match_operator:SI 5 "commutative_binary_operator"
11801
+ [(match_operand 6 "arm_general_register_operand" "")
11802
+ (match_operand 7 "arm_general_register_operand" "") ]))
11803
+ (clobber (reg:CC CC_REGNUM))])]
11804
+ "TARGET_LDRD && TARGET_ARM
11805
+ && current_tune->prefer_ldrd_strd
11806
+ && !optimize_function_for_size_p (cfun)
11807
+ && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
11808
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
11809
+ && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
11810
+ && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
11811
+ [(set (match_dup 0) (match_dup 2))
11813
+ [(set (match_dup 4)
11814
+ (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
11815
+ (clobber (reg:CC CC_REGNUM))])]
11817
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
11823
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
11824
+ operands[2] = adjust_address (operands[2], DImode, 0);
11829
+;; TODO: Handle LDRD/STRD with writeback:
11830
+;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
11831
+;; (b) Patterns may be followed by an update of the base address.
11832
--- a/src/gcc/config/arm/predicates.md
11833
+++ b/src/gcc/config/arm/predicates.md
11835
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
11838
+(define_predicate "imm_for_neon_inv_logic_operand"
11839
+ (match_code "const_vector")
11841
+ return (TARGET_NEON
11842
+ && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
11845
+(define_predicate "neon_inv_logic_op2"
11846
+ (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
11847
+ (match_operand 0 "s_register_operand")))
11849
;; Any hard register.
11850
(define_predicate "arm_hard_register_operand"
11852
@@ -145,6 +156,12 @@
11853
(ior (match_operand 0 "arm_rhs_operand")
11854
(match_operand 0 "arm_neg_immediate_operand")))
11856
+(define_predicate "arm_anddi_operand_neon"
11857
+ (ior (match_operand 0 "s_register_operand")
11858
+ (and (match_code "const_int")
11859
+ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
11860
+ (match_operand 0 "neon_inv_logic_op2")))
11862
(define_predicate "arm_adddi_operand"
11863
(ior (match_operand 0 "s_register_operand")
11864
(and (match_code "const_int")
11865
@@ -270,6 +287,18 @@
11866
(define_special_predicate "lt_ge_comparison_operator"
11867
(match_code "lt,ge"))
11869
+;; The vsel instruction only accepts the ARM condition codes listed below.
11870
+(define_special_predicate "arm_vsel_comparison_operator"
11871
+ (and (match_operand 0 "expandable_comparison_operator")
11872
+ (match_test "maybe_get_arm_condition_code (op) == ARM_GE
11873
+ || maybe_get_arm_condition_code (op) == ARM_GT
11874
+ || maybe_get_arm_condition_code (op) == ARM_EQ
11875
+ || maybe_get_arm_condition_code (op) == ARM_VS
11876
+ || maybe_get_arm_condition_code (op) == ARM_LT
11877
+ || maybe_get_arm_condition_code (op) == ARM_LE
11878
+ || maybe_get_arm_condition_code (op) == ARM_NE
11879
+ || maybe_get_arm_condition_code (op) == ARM_VC")))
11881
(define_special_predicate "noov_comparison_operator"
11882
(match_code "lt,ge,eq,ne"))
11884
@@ -513,21 +542,10 @@
11885
&& neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
11888
-(define_predicate "imm_for_neon_inv_logic_operand"
11889
- (match_code "const_vector")
11891
- return (TARGET_NEON
11892
- && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
11895
(define_predicate "neon_logic_op2"
11896
(ior (match_operand 0 "imm_for_neon_logic_operand")
11897
(match_operand 0 "s_register_operand")))
11899
-(define_predicate "neon_inv_logic_op2"
11900
- (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
11901
- (match_operand 0 "s_register_operand")))
11903
;; Predicates for named expanders that overlap multiple ISAs.
11905
(define_predicate "cmpdi_operand"
11906
--- a/src/gcc/config/arm/arm_neon.h
11907
+++ b/src/gcc/config/arm/arm_neon.h
11909
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
11910
typedef __builtin_neon_di int64x1_t;
11911
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
11912
+typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
11913
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
11914
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
11915
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
11916
@@ -6016,6 +6017,22 @@
11917
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
11920
+#if ((__ARM_FP & 0x2) != 0)
11921
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
11922
+vcvt_f16_f32 (float32x4_t __a)
11924
+ return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
11928
+#if ((__ARM_FP & 0x2) != 0)
11929
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11930
+vcvt_f32_f16 (float16x4_t __a)
11932
+ return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
11936
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11937
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
11939
--- a/src/gcc/config/arm/cortex-a53.md
11940
+++ b/src/gcc/config/arm/cortex-a53.md
11942
+;; ARM Cortex-A53 pipeline description
11943
+;; Copyright (C) 2013 Free Software Foundation, Inc.
11945
+;; Contributed by ARM Ltd.
11947
+;; This file is part of GCC.
11949
+;; GCC is free software; you can redistribute it and/or modify it
11950
+;; under the terms of the GNU General Public License as published by
11951
+;; the Free Software Foundation; either version 3, or (at your option)
11952
+;; any later version.
11954
+;; GCC is distributed in the hope that it will be useful, but
11955
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
11956
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11957
+;; General Public License for more details.
11959
+;; You should have received a copy of the GNU General Public License
11960
+;; along with GCC; see the file COPYING3. If not see
11961
+;; <http://www.gnu.org/licenses/>.
11963
+(define_automaton "cortex_a53")
11965
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11966
+;; Functional units.
11967
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11969
+;; There are two main integer execution pipelines, described as
11970
+;; slot 0 and issue slot 1.
11972
+(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
11973
+(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
11975
+(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
11976
+(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
11978
+;; The load/store pipeline. Load/store instructions can dual-issue from
11979
+;; either pipeline, but two load/stores cannot simultaneously issue.
11981
+(define_cpu_unit "cortex_a53_ls" "cortex_a53")
11983
+;; The store pipeline. Shared between both execution pipelines.
11985
+(define_cpu_unit "cortex_a53_store" "cortex_a53")
11987
+;; The branch pipeline. Branches can dual-issue with other instructions
11988
+;; (except when those instructions take multiple cycles to issue).
11990
+(define_cpu_unit "cortex_a53_branch" "cortex_a53")
11992
+;; The integer divider.
11994
+(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
11996
+;; The floating-point add pipeline used to model the usage
11997
+;; of the add pipeline by fmac instructions.
11999
+(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
12001
+;; Floating-point div/sqrt (long latency, out-of-order completion).
12003
+(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
12005
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12006
+;; ALU instructions.
12007
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12009
+(define_insn_reservation "cortex_a53_alu" 2
12010
+ (and (eq_attr "tune" "cortexa53")
12011
+ (eq_attr "type" "alu_reg,simple_alu_imm"))
12012
+ "cortex_a53_slot_any")
12014
+(define_insn_reservation "cortex_a53_alu_shift" 2
12015
+ (and (eq_attr "tune" "cortexa53")
12016
+ (eq_attr "type" "alu_shift,alu_shift_reg"))
12017
+ "cortex_a53_slot_any")
12019
+;; Forwarding path for unshifted operands.
12021
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
12022
+ "cortex_a53_alu")
12024
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
12025
+ "cortex_a53_alu_shift"
12026
+ "arm_no_early_alu_shift_dep")
12028
+;; The multiplier pipeline can forward results so there's no need to specify
12029
+;; bypasses. Multiplies can only single-issue currently.
12031
+(define_insn_reservation "cortex_a53_mul" 3
12032
+ (and (eq_attr "tune" "cortexa53")
12033
+ (eq_attr "type" "mult"))
12034
+ "cortex_a53_single_issue")
12036
+;; A multiply with a single-register result or an MLA, followed by an
12037
+;; MLA with an accumulator dependency, has its result forwarded so two
12038
+;; such instructions can issue back-to-back.
12040
+(define_bypass 1 "cortex_a53_mul"
12042
+ "arm_mac_accumulator_is_mul_result")
12044
+;; Punt with a high enough latency for divides.
12045
+(define_insn_reservation "cortex_a53_udiv" 8
12046
+ (and (eq_attr "tune" "cortexa53")
12047
+ (eq_attr "insn" "udiv"))
12048
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
12050
+(define_insn_reservation "cortex_a53_sdiv" 9
12051
+ (and (eq_attr "tune" "cortexa53")
12052
+ (eq_attr "insn" "sdiv"))
12053
+ "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
12056
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
12057
+ "cortex_a53_alu")
12058
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
12059
+ "cortex_a53_alu_shift"
12060
+ "arm_no_early_alu_shift_dep")
12062
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12063
+;; Load/store instructions.
12064
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12066
+;; Address-generation happens in the issue stage.
12068
+(define_insn_reservation "cortex_a53_load1" 3
12069
+ (and (eq_attr "tune" "cortexa53")
12070
+ (eq_attr "type" "load_byte,load1"))
12071
+ "cortex_a53_slot_any+cortex_a53_ls")
12073
+(define_insn_reservation "cortex_a53_store1" 2
12074
+ (and (eq_attr "tune" "cortexa53")
12075
+ (eq_attr "type" "store1"))
12076
+ "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
12078
+(define_insn_reservation "cortex_a53_load2" 3
12079
+ (and (eq_attr "tune" "cortexa53")
12080
+ (eq_attr "type" "load2"))
12081
+ "cortex_a53_single_issue+cortex_a53_ls")
12083
+(define_insn_reservation "cortex_a53_store2" 2
12084
+ (and (eq_attr "tune" "cortexa53")
12085
+ (eq_attr "type" "store2"))
12086
+ "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
12088
+(define_insn_reservation "cortex_a53_load3plus" 4
12089
+ (and (eq_attr "tune" "cortexa53")
12090
+ (eq_attr "type" "load3,load4"))
12091
+ "(cortex_a53_single_issue+cortex_a53_ls)*2")
12093
+(define_insn_reservation "cortex_a53_store3plus" 3
12094
+ (and (eq_attr "tune" "cortexa53")
12095
+ (eq_attr "type" "store3,store4"))
12096
+ "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
12098
+;; Load/store addresses are required early in Issue.
12099
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
12100
+ "cortex_a53_load*"
12101
+ "arm_early_load_addr_dep")
12102
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
12103
+ "cortex_a53_store*"
12104
+ "arm_early_store_addr_dep")
12106
+;; Load data can forward in the ALU pipeline
12107
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
12108
+ "cortex_a53_alu")
12109
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
12110
+ "cortex_a53_alu_shift"
12111
+ "arm_no_early_alu_shift_dep")
12113
+;; ALU ops can forward to stores.
12114
+(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
12115
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
12116
+ "arm_no_early_store_addr_dep")
12118
+(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
12119
+ "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
12120
+ "arm_no_early_store_addr_dep")
12122
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12124
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12126
+;; Currently models all branches as dual-issuable from either execution
12127
+;; slot, which isn't true for all cases. We still need to model indirect
12130
+(define_insn_reservation "cortex_a53_branch" 0
12131
+ (and (eq_attr "tune" "cortexa53")
12132
+ (eq_attr "type" "branch,call"))
12133
+ "cortex_a53_slot_any+cortex_a53_branch")
12135
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12136
+;; Floating-point arithmetic.
12137
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12139
+(define_insn_reservation "cortex_a53_fpalu" 4
12140
+ (and (eq_attr "tune" "cortexa53")
12141
+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
12143
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
12145
+(define_insn_reservation "cortex_a53_fconst" 2
12146
+ (and (eq_attr "tune" "cortexa53")
12147
+ (eq_attr "type" "fconsts,fconstd"))
12148
+ "cortex_a53_slot0+cortex_a53_fpadd_pipe")
12150
+(define_insn_reservation "cortex_a53_fpmul" 4
12151
+ (and (eq_attr "tune" "cortexa53")
12152
+ (eq_attr "type" "fmuls,fmuld"))
12153
+ "cortex_a53_slot0")
12155
+;; For single-precision multiply-accumulate, the add (accumulate) is issued after
12156
+;; the multiply completes. Model that accordingly.
12158
+(define_insn_reservation "cortex_a53_fpmac" 8
12159
+ (and (eq_attr "tune" "cortexa53")
12160
+ (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
12161
+ "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
12163
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12164
+;; Floating-point divide/square root instructions.
12165
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12166
+;; fsqrt really takes one cycle less, but that is not modelled.
12168
+(define_insn_reservation "cortex_a53_fdivs" 14
12169
+ (and (eq_attr "tune" "cortexa53")
12170
+ (eq_attr "type" "fdivs"))
12171
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
12173
+(define_insn_reservation "cortex_a53_fdivd" 29
12174
+ (and (eq_attr "tune" "cortexa53")
12175
+ (eq_attr "type" "fdivd"))
12176
+ "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
12178
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12179
+;; VFP to/from core transfers.
12180
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12182
+(define_insn_reservation "cortex_a53_r2f" 4
12183
+ (and (eq_attr "tune" "cortexa53")
12184
+ (eq_attr "type" "r_2_f"))
12185
+ "cortex_a53_slot0")
12187
+(define_insn_reservation "cortex_a53_f2r" 2
12188
+ (and (eq_attr "tune" "cortexa53")
12189
+ (eq_attr "type" "f_2_r"))
12190
+ "cortex_a53_slot0")
12192
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12193
+;; VFP flag transfer.
12194
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12196
+(define_insn_reservation "cortex_a53_f_flags" 4
12197
+ (and (eq_attr "tune" "cortexa53")
12198
+ (eq_attr "type" "f_flag"))
12199
+ "cortex_a53_slot0")
12201
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12202
+;; VFP load/store.
12203
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12205
+(define_insn_reservation "cortex_a53_f_loads" 4
12206
+ (and (eq_attr "tune" "cortexa53")
12207
+ (eq_attr "type" "f_loads"))
12208
+ "cortex_a53_slot0")
12210
+(define_insn_reservation "cortex_a53_f_loadd" 5
12211
+ (and (eq_attr "tune" "cortexa53")
12212
+ (eq_attr "type" "f_loadd"))
12213
+ "cortex_a53_slot0")
12215
+(define_insn_reservation "cortex_a53_f_stores" 0
12216
+ (and (eq_attr "tune" "cortexa53")
12217
+ (eq_attr "type" "f_stores"))
12218
+ "cortex_a53_slot0")
12220
+(define_insn_reservation "cortex_a53_f_stored" 0
12221
+ (and (eq_attr "tune" "cortexa53")
12222
+ (eq_attr "type" "f_stored"))
12223
+ "cortex_a53_slot0")
12225
+;; Load-to-use for floating-point values has a penalty of one cycle,
12226
+;; i.e. a latency of two.
12228
+(define_bypass 2 "cortex_a53_f_loads"
12229
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
12230
+ cortex_a53_fdivs, cortex_a53_fdivd,\
12233
+(define_bypass 2 "cortex_a53_f_loadd"
12234
+ "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
12235
+ cortex_a53_fdivs, cortex_a53_fdivd,\
12238
--- a/src/gcc/config/arm/bpabi.h
12239
+++ b/src/gcc/config/arm/bpabi.h
12242
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
12243
|mcpu=marvell-pj4 \
12244
+ |mcpu=cortex-a53 \
12245
|mcpu=generic-armv7-a \
12246
|march=armv7-m|mcpu=cortex-m3 \
12247
|march=armv7e-m|mcpu=cortex-m4 \
12249
" %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \
12251
|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \
12252
+ |mcpu=cortex-a53 \
12253
|mcpu=marvell-pj4 \
12254
|mcpu=generic-armv7-a \
12255
|march=armv7-m|mcpu=cortex-m3 \
12256
--- a/src/gcc/config/arm/sync.md
12257
+++ b/src/gcc/config/arm/sync.md
12259
(set_attr "conds" "unconditional")
12260
(set_attr "predicable" "no")])
12262
+(define_insn "atomic_load<mode>"
12263
+ [(set (match_operand:QHSI 0 "register_operand" "=r")
12264
+ (unspec_volatile:QHSI
12265
+ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")
12266
+ (match_operand:SI 2 "const_int_operand")] ;; model
12268
+ "TARGET_HAVE_LDACQ"
12270
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
12271
+ if (model == MEMMODEL_RELAXED
12272
+ || model == MEMMODEL_CONSUME
12273
+ || model == MEMMODEL_RELEASE)
12274
+ return \"ldr<sync_sfx>\\t%0, %1\";
12276
+ return \"lda<sync_sfx>\\t%0, %1\";
12280
+(define_insn "atomic_store<mode>"
12281
+ [(set (match_operand:QHSI 0 "memory_operand" "=Q")
12282
+ (unspec_volatile:QHSI
12283
+ [(match_operand:QHSI 1 "general_operand" "r")
12284
+ (match_operand:SI 2 "const_int_operand")] ;; model
12286
+ "TARGET_HAVE_LDACQ"
12288
+ enum memmodel model = (enum memmodel) INTVAL (operands[2]);
12289
+ if (model == MEMMODEL_RELAXED
12290
+ || model == MEMMODEL_CONSUME
12291
+ || model == MEMMODEL_ACQUIRE)
12292
+ return \"str<sync_sfx>\t%1, %0\";
12294
+ return \"stl<sync_sfx>\t%1, %0\";
12298
;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic,
12299
;; even for a 64-bit aligned address. Instead we use a ldrexd unparied
12301
@@ -327,6 +363,16 @@
12302
"ldrex<sync_sfx>%?\t%0, %C1"
12303
[(set_attr "predicable" "yes")])
12305
+(define_insn "arm_load_acquire_exclusive<mode>"
12306
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
12308
+ (unspec_volatile:NARROW
12309
+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
12311
+ "TARGET_HAVE_LDACQ"
12312
+ "ldaex<sync_sfx>%?\\t%0, %C1"
12313
+ [(set_attr "predicable" "yes")])
12315
(define_insn "arm_load_exclusivesi"
12316
[(set (match_operand:SI 0 "s_register_operand" "=r")
12317
(unspec_volatile:SI
12318
@@ -336,6 +382,15 @@
12320
[(set_attr "predicable" "yes")])
12322
+(define_insn "arm_load_acquire_exclusivesi"
12323
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
12324
+ (unspec_volatile:SI
12325
+ [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
12327
+ "TARGET_HAVE_LDACQ"
12328
+ "ldaex%?\t%0, %C1"
12329
+ [(set_attr "predicable" "yes")])
12331
(define_insn "arm_load_exclusivedi"
12332
[(set (match_operand:DI 0 "s_register_operand" "=r")
12333
(unspec_volatile:DI
12334
@@ -345,6 +400,15 @@
12335
"ldrexd%?\t%0, %H0, %C1"
12336
[(set_attr "predicable" "yes")])
12338
+(define_insn "arm_load_acquire_exclusivedi"
12339
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
12340
+ (unspec_volatile:DI
12341
+ [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
12343
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
12344
+ "ldaexd%?\t%0, %H0, %C1"
12345
+ [(set_attr "predicable" "yes")])
12347
(define_insn "arm_store_exclusive<mode>"
12348
[(set (match_operand:SI 0 "s_register_operand" "=&r")
12349
(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
12350
@@ -368,3 +432,31 @@
12351
return "strex<sync_sfx>%?\t%0, %2, %C1";
12353
[(set_attr "predicable" "yes")])
12355
+(define_insn "arm_store_release_exclusivedi"
12356
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
12357
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
12358
+ (set (match_operand:DI 1 "mem_noofs_operand" "=Ua")
12359
+ (unspec_volatile:DI
12360
+ [(match_operand:DI 2 "s_register_operand" "r")]
12362
+ "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
12364
+ rtx value = operands[2];
12365
+ /* See comment in arm_store_exclusive<mode> above. */
12366
+ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
12367
+ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
12368
+ return "stlexd%?\t%0, %2, %3, %C1";
12370
+ [(set_attr "predicable" "yes")])
12372
+(define_insn "arm_store_release_exclusive<mode>"
12373
+ [(set (match_operand:SI 0 "s_register_operand" "=&r")
12374
+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
12375
+ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
12376
+ (unspec_volatile:QHSI
12377
+ [(match_operand:QHSI 2 "s_register_operand" "r")]
12379
+ "TARGET_HAVE_LDACQ"
12380
+ "stlex<sync_sfx>%?\t%0, %2, %C1"
12381
+ [(set_attr "predicable" "yes")])
12382
--- a/src/gcc/config/arm/neon-testgen.ml
12383
+++ b/src/gcc/config/arm/neon-testgen.ml
12384
@@ -163,10 +163,12 @@
12385
match List.find (fun feature ->
12386
match feature with Requires_feature _ -> true
12387
| Requires_arch _ -> true
12388
+ | Requires_FP_bit 1 -> true
12391
Requires_feature "FMA" -> "arm_neonv2"
12392
| Requires_arch 8 -> "arm_v8_neon"
12393
+ | Requires_FP_bit 1 -> "arm_neon_fp16"
12394
| _ -> assert false
12395
with Not_found -> "arm_neon"
12397
--- a/src/gcc/config/arm/arm.md
12398
+++ b/src/gcc/config/arm/arm.md
12400
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
12401
; arm_arch6. This attribute is used to compute attribute "enabled",
12402
; use type "any" to enable an alternative in all cases.
12403
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
12404
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
12405
(const_string "any"))
12407
(define_attr "arch_enabled" "no,yes"
12408
@@ -129,24 +129,16 @@
12409
(match_test "TARGET_32BIT && !arm_arch6"))
12410
(const_string "yes")
12412
- (and (eq_attr "arch" "onlya8")
12413
- (eq_attr "tune" "cortexa8"))
12414
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
12415
+ (match_test "TARGET_NEON")
12416
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
12417
(const_string "yes")
12419
- (and (eq_attr "arch" "neon_onlya8")
12420
- (eq_attr "tune" "cortexa8")
12421
- (match_test "TARGET_NEON"))
12422
+ (and (eq_attr "arch" "neon_for_64bits")
12423
+ (match_test "TARGET_NEON")
12424
+ (match_test "TARGET_PREFER_NEON_64BITS"))
12425
(const_string "yes")
12427
- (and (eq_attr "arch" "nota8")
12428
- (not (eq_attr "tune" "cortexa8")))
12429
- (const_string "yes")
12431
- (and (eq_attr "arch" "neon_nota8")
12432
- (not (eq_attr "tune" "cortexa8"))
12433
- (match_test "TARGET_NEON"))
12434
- (const_string "yes")
12436
(and (eq_attr "arch" "iwmmxt2")
12437
(match_test "TARGET_REALLY_IWMMXT2"))
12438
(const_string "yes")]
12439
@@ -296,6 +288,8 @@
12448
@@ -502,7 +496,7 @@
12450
(define_attr "generic_sched" "yes,no"
12451
(const (if_then_else
12452
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4,marvell_pj4")
12453
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexm4,marvell_pj4")
12454
(eq_attr "tune_cortexr4" "yes"))
12455
(const_string "no")
12456
(const_string "yes"))))
12457
@@ -510,7 +504,7 @@
12458
(define_attr "generic_vfp" "yes,no"
12459
(const (if_then_else
12460
(and (eq_attr "fpu" "vfp")
12461
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4,marvell_pj4")
12462
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
12463
(eq_attr "tune_cortexr4" "no"))
12464
(const_string "yes")
12465
(const_string "no"))))
12466
@@ -531,6 +525,7 @@
12467
(include "cortex-a8.md")
12468
(include "cortex-a9.md")
12469
(include "cortex-a15.md")
12470
+(include "cortex-a53.md")
12471
(include "cortex-r4.md")
12472
(include "cortex-r4f.md")
12473
(include "cortex-m4.md")
12474
@@ -844,7 +839,7 @@
12476
;; This is the canonicalization of addsi3_compare0_for_combiner when the
12477
;; addend is a constant.
12478
-(define_insn "*cmpsi2_addneg"
12479
+(define_insn "cmpsi2_addneg"
12480
[(set (reg:CC CC_REGNUM)
12482
(match_operand:SI 1 "s_register_operand" "r,r")
12483
@@ -975,7 +970,8 @@
12486
sbc%?\\t%0, %1, #%B2"
12487
- [(set_attr "conds" "use")]
12488
+ [(set_attr "conds" "use")
12489
+ (set_attr "predicable" "yes")]
12492
(define_insn "*addsi3_carryin_alt2_<optab>"
12493
@@ -987,7 +983,8 @@
12496
sbc%?\\t%0, %1, #%B2"
12497
- [(set_attr "conds" "use")]
12498
+ [(set_attr "conds" "use")
12499
+ (set_attr "predicable" "yes")]
12502
(define_insn "*addsi3_carryin_shift_<optab>"
12503
@@ -1001,6 +998,7 @@
12505
"adc%?\\t%0, %1, %3%S2"
12506
[(set_attr "conds" "use")
12507
+ (set_attr "predicable" "yes")
12508
(set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
12509
(const_string "alu_shift")
12510
(const_string "alu_shift_reg")))]
12511
@@ -1017,26 +1015,88 @@
12512
[(set_attr "conds" "set")]
12515
-(define_expand "incscc"
12516
+(define_insn "*subsi3_carryin"
12517
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
12518
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
12519
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
12520
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
12521
+ (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
12522
+ (match_operand:SI 2 "s_register_operand" "r,r"))
12523
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12527
+ sbc%?\\t%0, %1, %2
12528
+ rsc%?\\t%0, %2, %1"
12529
+ [(set_attr "conds" "use")
12530
+ (set_attr "arch" "*,a")
12531
+ (set_attr "predicable" "yes")]
12534
-(define_insn "*arm_incscc"
12535
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
12536
- (plus:SI (match_operator:SI 2 "arm_comparison_operator"
12537
- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
12538
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
12539
+(define_insn "*subsi3_carryin_const"
12540
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
12541
+ (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r")
12542
+ (match_operand:SI 2 "arm_not_operand" "K"))
12543
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12545
+ "sbc\\t%0, %1, #%B2"
12546
+ [(set_attr "conds" "use")]
12549
+(define_insn "*subsi3_carryin_compare"
12550
+ [(set (reg:CC CC_REGNUM)
12551
+ (compare:CC (match_operand:SI 1 "s_register_operand" "r")
12552
+ (match_operand:SI 2 "s_register_operand" "r")))
12553
+ (set (match_operand:SI 0 "s_register_operand" "=r")
12554
+ (minus:SI (minus:SI (match_dup 1)
12556
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12558
+ "sbcs\\t%0, %1, %2"
12559
+ [(set_attr "conds" "set")]
12562
+(define_insn "*subsi3_carryin_compare_const"
12563
+ [(set (reg:CC CC_REGNUM)
12564
+ (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r")
12565
+ (match_operand:SI 2 "arm_not_operand" "K")))
12566
+ (set (match_operand:SI 0 "s_register_operand" "=r")
12567
+ (minus:SI (plus:SI (match_dup 1)
12569
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12571
+ "sbcs\\t%0, %1, #%B2"
12572
+ [(set_attr "conds" "set")]
12575
+(define_insn "*subsi3_carryin_shift"
12576
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
12577
+ (minus:SI (minus:SI
12578
+ (match_operand:SI 1 "s_register_operand" "r")
12579
+ (match_operator:SI 2 "shift_operator"
12580
+ [(match_operand:SI 3 "s_register_operand" "r")
12581
+ (match_operand:SI 4 "reg_or_int_operand" "rM")]))
12582
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12584
+ "sbc%?\\t%0, %1, %3%S2"
12585
+ [(set_attr "conds" "use")
12586
+ (set_attr "predicable" "yes")
12587
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
12588
+ (const_string "alu_shift")
12589
+ (const_string "alu_shift_reg")))]
12592
+(define_insn "*rsbsi3_carryin_shift"
12593
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
12594
+ (minus:SI (minus:SI
12595
+ (match_operator:SI 2 "shift_operator"
12596
+ [(match_operand:SI 3 "s_register_operand" "r")
12597
+ (match_operand:SI 4 "reg_or_int_operand" "rM")])
12598
+ (match_operand:SI 1 "s_register_operand" "r"))
12599
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12602
- add%d2\\t%0, %1, #1
12603
- mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
12604
+ "rsc%?\\t%0, %1, %3%S2"
12605
[(set_attr "conds" "use")
12606
- (set_attr "length" "4,8")]
12607
+ (set_attr "predicable" "yes")
12608
+ (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
12609
+ (const_string "alu_shift")
12610
+ (const_string "alu_shift_reg")))]
12613
; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant.
12614
@@ -1087,13 +1147,27 @@
12618
-(define_insn "*arm_subdi3"
12619
+(define_insn_and_split "*arm_subdi3"
12620
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r")
12621
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
12622
(match_operand:DI 2 "s_register_operand" "r,0,0")))
12623
(clobber (reg:CC CC_REGNUM))]
12624
"TARGET_32BIT && !TARGET_NEON"
12625
- "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
12626
+ "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
12627
+ "&& reload_completed"
12628
+ [(parallel [(set (reg:CC CC_REGNUM)
12629
+ (compare:CC (match_dup 1) (match_dup 2)))
12630
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
12631
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
12632
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12634
+ operands[3] = gen_highpart (SImode, operands[0]);
12635
+ operands[0] = gen_lowpart (SImode, operands[0]);
12636
+ operands[4] = gen_highpart (SImode, operands[1]);
12637
+ operands[1] = gen_lowpart (SImode, operands[1]);
12638
+ operands[5] = gen_highpart (SImode, operands[2]);
12639
+ operands[2] = gen_lowpart (SImode, operands[2]);
12641
[(set_attr "conds" "clob")
12642
(set_attr "length" "8")]
12644
@@ -1108,55 +1182,113 @@
12645
[(set_attr "length" "4")]
12648
-(define_insn "*subdi_di_zesidi"
12649
+(define_insn_and_split "*subdi_di_zesidi"
12650
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
12651
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
12653
(match_operand:SI 2 "s_register_operand" "r,r"))))
12654
(clobber (reg:CC CC_REGNUM))]
12656
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
12657
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
12658
+ "&& reload_completed"
12659
+ [(parallel [(set (reg:CC CC_REGNUM)
12660
+ (compare:CC (match_dup 1) (match_dup 2)))
12661
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
12662
+ (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5))
12663
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12665
+ operands[3] = gen_highpart (SImode, operands[0]);
12666
+ operands[0] = gen_lowpart (SImode, operands[0]);
12667
+ operands[4] = gen_highpart (SImode, operands[1]);
12668
+ operands[1] = gen_lowpart (SImode, operands[1]);
12669
+ operands[5] = GEN_INT (~0);
12671
[(set_attr "conds" "clob")
12672
(set_attr "length" "8")]
12675
-(define_insn "*subdi_di_sesidi"
12676
+(define_insn_and_split "*subdi_di_sesidi"
12677
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
12678
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r")
12680
(match_operand:SI 2 "s_register_operand" "r,r"))))
12681
(clobber (reg:CC CC_REGNUM))]
12683
- "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
12684
+ "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
12685
+ "&& reload_completed"
12686
+ [(parallel [(set (reg:CC CC_REGNUM)
12687
+ (compare:CC (match_dup 1) (match_dup 2)))
12688
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
12689
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4)
12690
+ (ashiftrt:SI (match_dup 2)
12692
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12694
+ operands[3] = gen_highpart (SImode, operands[0]);
12695
+ operands[0] = gen_lowpart (SImode, operands[0]);
12696
+ operands[4] = gen_highpart (SImode, operands[1]);
12697
+ operands[1] = gen_lowpart (SImode, operands[1]);
12699
[(set_attr "conds" "clob")
12700
(set_attr "length" "8")]
12703
-(define_insn "*subdi_zesidi_di"
12704
+(define_insn_and_split "*subdi_zesidi_di"
12705
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
12706
(minus:DI (zero_extend:DI
12707
(match_operand:SI 2 "s_register_operand" "r,r"))
12708
(match_operand:DI 1 "s_register_operand" "0,r")))
12709
(clobber (reg:CC CC_REGNUM))]
12711
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
12712
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
12713
+ ; is equivalent to:
12714
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0"
12715
+ "&& reload_completed"
12716
+ [(parallel [(set (reg:CC CC_REGNUM)
12717
+ (compare:CC (match_dup 2) (match_dup 1)))
12718
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
12719
+ (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4))
12720
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12722
+ operands[3] = gen_highpart (SImode, operands[0]);
12723
+ operands[0] = gen_lowpart (SImode, operands[0]);
12724
+ operands[4] = gen_highpart (SImode, operands[1]);
12725
+ operands[1] = gen_lowpart (SImode, operands[1]);
12727
[(set_attr "conds" "clob")
12728
(set_attr "length" "8")]
12731
-(define_insn "*subdi_sesidi_di"
12732
+(define_insn_and_split "*subdi_sesidi_di"
12733
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
12734
(minus:DI (sign_extend:DI
12735
(match_operand:SI 2 "s_register_operand" "r,r"))
12736
(match_operand:DI 1 "s_register_operand" "0,r")))
12737
(clobber (reg:CC CC_REGNUM))]
12739
- "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
12740
+ "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
12741
+ ; is equivalent to:
12742
+ ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31"
12743
+ "&& reload_completed"
12744
+ [(parallel [(set (reg:CC CC_REGNUM)
12745
+ (compare:CC (match_dup 2) (match_dup 1)))
12746
+ (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
12747
+ (set (match_dup 3) (minus:SI (minus:SI
12748
+ (ashiftrt:SI (match_dup 2)
12751
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12753
+ operands[3] = gen_highpart (SImode, operands[0]);
12754
+ operands[0] = gen_lowpart (SImode, operands[0]);
12755
+ operands[4] = gen_highpart (SImode, operands[1]);
12756
+ operands[1] = gen_lowpart (SImode, operands[1]);
12758
[(set_attr "conds" "clob")
12759
(set_attr "length" "8")]
12762
-(define_insn "*subdi_zesidi_zesidi"
12763
+(define_insn_and_split "*subdi_zesidi_zesidi"
12764
[(set (match_operand:DI 0 "s_register_operand" "=r")
12765
(minus:DI (zero_extend:DI
12766
(match_operand:SI 1 "s_register_operand" "r"))
12767
@@ -1164,7 +1296,17 @@
12768
(match_operand:SI 2 "s_register_operand" "r"))))
12769
(clobber (reg:CC CC_REGNUM))]
12771
- "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
12772
+ "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
12773
+ "&& reload_completed"
12774
+ [(parallel [(set (reg:CC CC_REGNUM)
12775
+ (compare:CC (match_dup 1) (match_dup 2)))
12776
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
12777
+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1))
12778
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
12780
+ operands[3] = gen_highpart (SImode, operands[0]);
12781
+ operands[0] = gen_lowpart (SImode, operands[0]);
12783
[(set_attr "conds" "clob")
12784
(set_attr "length" "8")]
12786
@@ -1254,7 +1396,7 @@
12787
(set_attr "type" "simple_alu_imm,*,*")]
12790
-(define_insn "*subsi3_compare"
12791
+(define_insn "subsi3_compare"
12792
[(set (reg:CC CC_REGNUM)
12793
(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
12794
(match_operand:SI 2 "arm_rhs_operand" "I,r,r")))
12795
@@ -1269,29 +1411,6 @@
12796
(set_attr "type" "simple_alu_imm,*,*")]
12799
-(define_expand "decscc"
12800
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
12801
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
12802
- (match_operator:SI 2 "arm_comparison_operator"
12803
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
12808
-(define_insn "*arm_decscc"
12809
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
12810
- (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
12811
- (match_operator:SI 2 "arm_comparison_operator"
12812
- [(match_operand 3 "cc_register" "") (const_int 0)])))]
12815
- sub%d2\\t%0, %1, #1
12816
- mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
12817
- [(set_attr "conds" "use")
12818
- (set_attr "length" "*,8")
12819
- (set_attr "type" "simple_alu_imm,*")]
12822
(define_expand "subsf3"
12823
[(set (match_operand:SF 0 "s_register_operand" "")
12824
(minus:SF (match_operand:SF 1 "s_register_operand" "")
12825
@@ -2024,13 +2143,58 @@
12829
-(define_insn "*anddi3_insn"
12830
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
12831
- (and:DI (match_operand:DI 1 "s_register_operand" "%0,r")
12832
- (match_operand:DI 2 "s_register_operand" "r,r")))]
12833
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
12835
- [(set_attr "length" "8")]
12836
+(define_insn_and_split "*anddi3_insn"
12837
+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
12838
+ (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0")
12839
+ (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))]
12840
+ "TARGET_32BIT && !TARGET_IWMMXT"
12842
+ switch (which_alternative)
12847
+ case 3: /* fall through */
12849
+ case 4: /* fall through */
12850
+ case 8: return "vand\t%P0, %P1, %P2";
12851
+ case 5: /* fall through */
12852
+ case 9: return neon_output_logic_immediate ("vand", &operands[2],
12853
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
12854
+ case 6: return "#";
12855
+ case 7: return "#";
12856
+ default: gcc_unreachable ();
12859
+ "TARGET_32BIT && !TARGET_IWMMXT"
12860
+ [(set (match_dup 3) (match_dup 4))
12861
+ (set (match_dup 5) (match_dup 6))]
12864
+ operands[3] = gen_lowpart (SImode, operands[0]);
12865
+ operands[5] = gen_highpart (SImode, operands[0]);
12867
+ operands[4] = simplify_gen_binary (AND, SImode,
12868
+ gen_lowpart (SImode, operands[1]),
12869
+ gen_lowpart (SImode, operands[2]));
12870
+ operands[6] = simplify_gen_binary (AND, SImode,
12871
+ gen_highpart (SImode, operands[1]),
12872
+ gen_highpart_mode (SImode, DImode, operands[2]));
12875
+ [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
12876
+ (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
12877
+ avoid_neon_for_64bits,avoid_neon_for_64bits")
12878
+ (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
12879
+ (set (attr "insn_enabled") (if_then_else
12880
+ (lt (symbol_ref "which_alternative")
12882
+ (if_then_else (match_test "!TARGET_NEON")
12883
+ (const_string "yes")
12884
+ (const_string "no"))
12885
+ (if_then_else (match_test "TARGET_NEON")
12886
+ (const_string "yes")
12887
+ (const_string "no"))))]
12890
(define_insn_and_split "*anddi_zesidi_di"
12891
@@ -3096,13 +3260,17 @@
12895
-(define_insn "*andsi_iorsi3_notsi"
12896
+(define_insn_and_split "*andsi_iorsi3_notsi"
12897
[(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
12898
(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
12899
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
12900
(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
12902
- "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
12903
+ "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
12904
+ "&& reload_completed"
12905
+ [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
12906
+ (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))]
12908
[(set_attr "length" "8")
12909
(set_attr "ce_count" "2")
12910
(set_attr "predicable" "yes")]
12911
@@ -3253,15 +3421,23 @@
12912
[(set_attr "predicable" "yes")]
12915
-(define_insn "*arm_smax_insn"
12916
+(define_insn_and_split "*arm_smax_insn"
12917
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
12918
(smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
12919
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
12920
(clobber (reg:CC CC_REGNUM))]
12923
- cmp\\t%1, %2\;movlt\\t%0, %2
12924
- cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
12926
+ ; cmp\\t%1, %2\;movlt\\t%0, %2
12927
+ ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
12929
+ [(set (reg:CC CC_REGNUM)
12930
+ (compare:CC (match_dup 1) (match_dup 2)))
12931
+ (set (match_dup 0)
12932
+ (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
12936
[(set_attr "conds" "clob")
12937
(set_attr "length" "8,12")]
12939
@@ -3293,15 +3469,23 @@
12940
[(set_attr "predicable" "yes")]
12943
-(define_insn "*arm_smin_insn"
12944
+(define_insn_and_split "*arm_smin_insn"
12945
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
12946
(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
12947
(match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
12948
(clobber (reg:CC CC_REGNUM))]
12951
- cmp\\t%1, %2\;movge\\t%0, %2
12952
- cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
12954
+ ; cmp\\t%1, %2\;movge\\t%0, %2
12955
+ ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
12957
+ [(set (reg:CC CC_REGNUM)
12958
+ (compare:CC (match_dup 1) (match_dup 2)))
12959
+ (set (match_dup 0)
12960
+ (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
12964
[(set_attr "conds" "clob")
12965
(set_attr "length" "8,12")]
12967
@@ -3316,16 +3500,24 @@
12971
-(define_insn "*arm_umaxsi3"
12972
+(define_insn_and_split "*arm_umaxsi3"
12973
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
12974
(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
12975
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
12976
(clobber (reg:CC CC_REGNUM))]
12979
- cmp\\t%1, %2\;movcc\\t%0, %2
12980
- cmp\\t%1, %2\;movcs\\t%0, %1
12981
- cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
12983
+ ; cmp\\t%1, %2\;movcc\\t%0, %2
12984
+ ; cmp\\t%1, %2\;movcs\\t%0, %1
12985
+ ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
12987
+ [(set (reg:CC CC_REGNUM)
12988
+ (compare:CC (match_dup 1) (match_dup 2)))
12989
+ (set (match_dup 0)
12990
+ (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
12994
[(set_attr "conds" "clob")
12995
(set_attr "length" "8,8,12")]
12997
@@ -3340,16 +3532,24 @@
13001
-(define_insn "*arm_uminsi3"
13002
+(define_insn_and_split "*arm_uminsi3"
13003
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
13004
(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
13005
(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
13006
(clobber (reg:CC CC_REGNUM))]
13009
- cmp\\t%1, %2\;movcs\\t%0, %2
13010
- cmp\\t%1, %2\;movcc\\t%0, %1
13011
- cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
13013
+ ; cmp\\t%1, %2\;movcs\\t%0, %2
13014
+ ; cmp\\t%1, %2\;movcc\\t%0, %1
13015
+ ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
13017
+ [(set (reg:CC CC_REGNUM)
13018
+ (compare:CC (match_dup 1) (match_dup 2)))
13019
+ (set (match_dup 0)
13020
+ (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
13024
[(set_attr "conds" "clob")
13025
(set_attr "length" "8,8,12")]
13027
@@ -3360,7 +3560,7 @@
13028
[(match_operand:SI 1 "s_register_operand" "r")
13029
(match_operand:SI 2 "s_register_operand" "r")]))
13030
(clobber (reg:CC CC_REGNUM))]
13032
+ "TARGET_32BIT && optimize_insn_for_size_p()"
13034
operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
13035
operands[1], operands[2]);
13036
@@ -3423,6 +3623,50 @@
13040
+; Reject the frame pointer in operand[1], since reloading this after
13041
+; it has been eliminated can cause carnage.
13042
+(define_insn_and_split "*minmax_arithsi_non_canon"
13043
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r")
13045
+ (match_operand:SI 1 "s_register_operand" "0,?r")
13046
+ (match_operator:SI 4 "minmax_operator"
13047
+ [(match_operand:SI 2 "s_register_operand" "r,r")
13048
+ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
13049
+ (clobber (reg:CC CC_REGNUM))]
13050
+ "TARGET_32BIT && !arm_eliminable_register (operands[1])"
13052
+ "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
13053
+ [(set (reg:CC CC_REGNUM)
13054
+ (compare:CC (match_dup 2) (match_dup 3)))
13056
+ (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)])
13057
+ (set (match_dup 0)
13058
+ (minus:SI (match_dup 1)
13060
+ (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
13061
+ (set (match_dup 0)
13062
+ (minus:SI (match_dup 1)
13063
+ (match_dup 3))))]
13065
+ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
13066
+ operands[2], operands[3]);
13067
+ enum rtx_code rc = minmax_code (operands[4]);
13068
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode,
13069
+ operands[2], operands[3]);
13071
+ if (mode == CCFPmode || mode == CCFPEmode)
13072
+ rc = reverse_condition_maybe_unordered (rc);
13074
+ rc = reverse_condition (rc);
13075
+ operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
13077
+ [(set_attr "conds" "clob")
13078
+ (set (attr "length")
13079
+ (if_then_else (eq_attr "is_thumb" "yes")
13081
+ (const_int 12)))]
13084
(define_code_iterator SAT [smin smax])
13085
(define_code_iterator SATrev [smin smax])
13086
(define_code_attr SATlo [(smin "1") (smax "2")])
13087
@@ -3533,13 +3777,26 @@
13091
-(define_insn "arm_ashldi3_1bit"
13092
+(define_insn_and_split "arm_ashldi3_1bit"
13093
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
13094
(ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
13096
(clobber (reg:CC CC_REGNUM))]
13098
- "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
13099
+ "#" ; "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
13100
+ "&& reload_completed"
13101
+ [(parallel [(set (reg:CC CC_REGNUM)
13102
+ (compare:CC (ashift:SI (match_dup 1) (const_int 1))
13104
+ (set (match_dup 0) (ashift:SI (match_dup 1) (const_int 1)))])
13105
+ (set (match_dup 2) (plus:SI (plus:SI (match_dup 3) (match_dup 3))
13106
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
13108
+ operands[2] = gen_highpart (SImode, operands[0]);
13109
+ operands[0] = gen_lowpart (SImode, operands[0]);
13110
+ operands[3] = gen_highpart (SImode, operands[1]);
13111
+ operands[1] = gen_lowpart (SImode, operands[1]);
13113
[(set_attr "conds" "clob")
13114
(set_attr "length" "8")]
13116
@@ -3615,18 +3872,43 @@
13120
-(define_insn "arm_ashrdi3_1bit"
13121
+(define_insn_and_split "arm_ashrdi3_1bit"
13122
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
13123
(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
13125
(clobber (reg:CC CC_REGNUM))]
13127
- "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
13128
+ "#" ; "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
13129
+ "&& reload_completed"
13130
+ [(parallel [(set (reg:CC CC_REGNUM)
13131
+ (compare:CC (ashiftrt:SI (match_dup 3) (const_int 1))
13133
+ (set (match_dup 2) (ashiftrt:SI (match_dup 3) (const_int 1)))])
13134
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
13135
+ (reg:CC_C CC_REGNUM)]
13138
+ operands[2] = gen_highpart (SImode, operands[0]);
13139
+ operands[0] = gen_lowpart (SImode, operands[0]);
13140
+ operands[3] = gen_highpart (SImode, operands[1]);
13141
+ operands[1] = gen_lowpart (SImode, operands[1]);
13143
[(set_attr "conds" "clob")
13144
- (set_attr "insn" "mov")
13145
(set_attr "length" "8")]
13148
+(define_insn "*rrx"
13149
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
13150
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
13151
+ (reg:CC_C CC_REGNUM)]
13154
+ "mov\\t%0, %1, rrx"
13155
+ [(set_attr "conds" "use")
13156
+ (set_attr "insn" "mov")
13157
+ (set_attr "type" "alu_shift")]
13160
(define_expand "ashrsi3"
13161
[(set (match_operand:SI 0 "s_register_operand" "")
13162
(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
13163
@@ -3695,15 +3977,28 @@
13167
-(define_insn "arm_lshrdi3_1bit"
13168
+(define_insn_and_split "arm_lshrdi3_1bit"
13169
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
13170
(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
13172
(clobber (reg:CC CC_REGNUM))]
13174
- "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
13175
+ "#" ; "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
13176
+ "&& reload_completed"
13177
+ [(parallel [(set (reg:CC CC_REGNUM)
13178
+ (compare:CC (lshiftrt:SI (match_dup 3) (const_int 1))
13180
+ (set (match_dup 2) (lshiftrt:SI (match_dup 3) (const_int 1)))])
13181
+ (set (match_dup 0) (unspec:SI [(match_dup 1)
13182
+ (reg:CC_C CC_REGNUM)]
13185
+ operands[2] = gen_highpart (SImode, operands[0]);
13186
+ operands[0] = gen_lowpart (SImode, operands[0]);
13187
+ operands[3] = gen_highpart (SImode, operands[1]);
13188
+ operands[1] = gen_lowpart (SImode, operands[1]);
13190
[(set_attr "conds" "clob")
13191
- (set_attr "insn" "mov")
13192
(set_attr "length" "8")]
13195
@@ -3791,6 +4086,23 @@
13196
(const_string "alu_shift_reg")))]
13199
+(define_insn "*shiftsi3_compare"
13200
+ [(set (reg:CC CC_REGNUM)
13201
+ (compare:CC (match_operator:SI 3 "shift_operator"
13202
+ [(match_operand:SI 1 "s_register_operand" "r")
13203
+ (match_operand:SI 2 "arm_rhs_operand" "rM")])
13205
+ (set (match_operand:SI 0 "s_register_operand" "=r")
13206
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
13208
+ "* return arm_output_shift(operands, 1);"
13209
+ [(set_attr "conds" "set")
13210
+ (set_attr "shift" "1")
13211
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
13212
+ (const_string "alu_shift")
13213
+ (const_string "alu_shift_reg")))]
13216
(define_insn "*shiftsi3_compare0"
13217
[(set (reg:CC_NOOV CC_REGNUM)
13218
(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
13219
@@ -4154,12 +4466,24 @@
13221
;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
13222
;; The first alternative allows the common case of a *full* overlap.
13223
-(define_insn "*arm_negdi2"
13224
+(define_insn_and_split "*arm_negdi2"
13225
[(set (match_operand:DI 0 "s_register_operand" "=r,&r")
13226
(neg:DI (match_operand:DI 1 "s_register_operand" "0,r")))
13227
(clobber (reg:CC CC_REGNUM))]
13229
- "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
13230
+ "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
13231
+ "&& reload_completed"
13232
+ [(parallel [(set (reg:CC CC_REGNUM)
13233
+ (compare:CC (const_int 0) (match_dup 1)))
13234
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
13235
+ (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
13236
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
13238
+ operands[2] = gen_highpart (SImode, operands[0]);
13239
+ operands[0] = gen_lowpart (SImode, operands[0]);
13240
+ operands[3] = gen_highpart (SImode, operands[1]);
13241
+ operands[1] = gen_lowpart (SImode, operands[1]);
13243
[(set_attr "conds" "clob")
13244
(set_attr "length" "8")]
13246
@@ -4209,6 +4533,73 @@
13247
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
13250
+;; Negate an extended 32-bit value.
13251
+(define_insn_and_split "*negdi_extendsidi"
13252
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r,l,&l")
13253
+ (neg:DI (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r,0,l"))))
13254
+ (clobber (reg:CC CC_REGNUM))]
13256
+ "#" ; rsb\\t%Q0, %1, #0\;asr\\t%R0, %Q0, #31
13257
+ "&& reload_completed"
13260
+ operands[2] = gen_highpart (SImode, operands[0]);
13261
+ operands[0] = gen_lowpart (SImode, operands[0]);
13262
+ rtx tmp = gen_rtx_SET (VOIDmode,
13264
+ gen_rtx_MINUS (SImode,
13273
+ /* Set the flags, to emit the short encoding in Thumb2. */
13274
+ rtx flags = gen_rtx_SET (VOIDmode,
13275
+ gen_rtx_REG (CCmode, CC_REGNUM),
13276
+ gen_rtx_COMPARE (CCmode,
13279
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
13284
+ emit_insn (gen_rtx_SET (VOIDmode,
13286
+ gen_rtx_ASHIFTRT (SImode,
13291
+ [(set_attr "length" "8,8,4,4")
13292
+ (set_attr "arch" "a,a,t2,t2")]
13295
+(define_insn_and_split "*negdi_zero_extendsidi"
13296
+ [(set (match_operand:DI 0 "s_register_operand" "=r,&r")
13297
+ (neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))))
13298
+ (clobber (reg:CC CC_REGNUM))]
13300
+ "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0"
13301
+ ;; Don't care what register is input to sbc,
13302
+ ;; since we just just need to propagate the carry.
13303
+ "&& reload_completed"
13304
+ [(parallel [(set (reg:CC CC_REGNUM)
13305
+ (compare:CC (const_int 0) (match_dup 1)))
13306
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
13307
+ (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2))
13308
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
13310
+ operands[2] = gen_highpart (SImode, operands[0]);
13311
+ operands[0] = gen_lowpart (SImode, operands[0]);
13313
+ [(set_attr "conds" "clob")
13314
+ (set_attr "length" "8")] ;; length in thumb is 4
13317
;; abssi2 doesn't really clobber the condition codes if a different register
13318
;; is being set. To keep things simple, assume during rtl manipulations that
13319
;; it does, but tell the final scan operator the truth. Similarly for
13320
@@ -4227,14 +4618,67 @@
13321
operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
13324
-(define_insn "*arm_abssi2"
13325
+(define_insn_and_split "*arm_abssi2"
13326
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
13327
(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
13328
(clobber (reg:CC CC_REGNUM))]
13331
- cmp\\t%0, #0\;rsblt\\t%0, %0, #0
13332
- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
13334
+ "&& reload_completed"
13337
+ /* if (which_alternative == 0) */
13338
+ if (REGNO(operands[0]) == REGNO(operands[1]))
13340
+ /* Emit the pattern:
13341
+ cmp\\t%0, #0\;rsblt\\t%0, %0, #0
13342
+ [(set (reg:CC CC_REGNUM)
13343
+ (compare:CC (match_dup 0) (const_int 0)))
13344
+ (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0))
13345
+ (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))]
13347
+ emit_insn (gen_rtx_SET (VOIDmode,
13348
+ gen_rtx_REG (CCmode, CC_REGNUM),
13349
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
13350
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13351
+ (gen_rtx_LT (SImode,
13352
+ gen_rtx_REG (CCmode, CC_REGNUM),
13354
+ (gen_rtx_SET (VOIDmode,
13356
+ (gen_rtx_MINUS (SImode,
13358
+ operands[1]))))));
13363
+ /* Emit the pattern:
13364
+ alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
13365
+ [(set (match_dup 0)
13366
+ (xor:SI (match_dup 1)
13367
+ (ashiftrt:SI (match_dup 1) (const_int 31))))
13368
+ (set (match_dup 0)
13369
+ (minus:SI (match_dup 0)
13370
+ (ashiftrt:SI (match_dup 1) (const_int 31))))]
13372
+ emit_insn (gen_rtx_SET (VOIDmode,
13374
+ gen_rtx_XOR (SImode,
13375
+ gen_rtx_ASHIFTRT (SImode,
13379
+ emit_insn (gen_rtx_SET (VOIDmode,
13381
+ gen_rtx_MINUS (SImode,
13383
+ gen_rtx_ASHIFTRT (SImode,
13385
+ GEN_INT (31)))));
13389
[(set_attr "conds" "clob,*")
13390
(set_attr "shift" "1")
13391
(set_attr "predicable" "no, yes")
13392
@@ -4255,14 +4699,56 @@
13393
[(set_attr "length" "6")]
13396
-(define_insn "*arm_neg_abssi2"
13397
+(define_insn_and_split "*arm_neg_abssi2"
13398
[(set (match_operand:SI 0 "s_register_operand" "=r,&r")
13399
(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
13400
(clobber (reg:CC CC_REGNUM))]
13403
- cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
13404
- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
13406
+ "&& reload_completed"
13409
+ /* if (which_alternative == 0) */
13410
+ if (REGNO (operands[0]) == REGNO (operands[1]))
13412
+ /* Emit the pattern:
13413
+ cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
13415
+ emit_insn (gen_rtx_SET (VOIDmode,
13416
+ gen_rtx_REG (CCmode, CC_REGNUM),
13417
+ gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
13418
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13419
+ gen_rtx_GT (SImode,
13420
+ gen_rtx_REG (CCmode, CC_REGNUM),
13422
+ gen_rtx_SET (VOIDmode,
13424
+ (gen_rtx_MINUS (SImode,
13426
+ operands[1])))));
13430
+ /* Emit the pattern:
13431
+ eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
13433
+ emit_insn (gen_rtx_SET (VOIDmode,
13435
+ gen_rtx_XOR (SImode,
13436
+ gen_rtx_ASHIFTRT (SImode,
13440
+ emit_insn (gen_rtx_SET (VOIDmode,
13442
+ gen_rtx_MINUS (SImode,
13443
+ gen_rtx_ASHIFTRT (SImode,
13450
[(set_attr "conds" "clob,*")
13451
(set_attr "shift" "1")
13452
(set_attr "predicable" "no, yes")
13453
@@ -4330,7 +4816,7 @@
13454
[(set_attr "length" "*,8,8,*")
13455
(set_attr "predicable" "no,yes,yes,no")
13456
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
13457
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
13458
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
13461
(define_expand "one_cmplsi2"
13462
@@ -4498,7 +4984,7 @@
13463
"TARGET_32BIT <qhs_zextenddi_cond>"
13465
[(set_attr "length" "8,4,8,8")
13466
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")
13467
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
13468
(set_attr "ce_count" "2")
13469
(set_attr "predicable" "yes")]
13471
@@ -4513,7 +4999,7 @@
13472
(set_attr "ce_count" "2")
13473
(set_attr "shift" "1")
13474
(set_attr "predicable" "yes")
13475
- (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
13476
+ (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
13479
;; Splits for all extensions to DImode
13480
@@ -5313,8 +5799,8 @@
13483
(define_insn "*arm_movdi"
13484
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
13485
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))]
13486
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
13487
+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))]
13489
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
13491
@@ -6738,8 +7224,8 @@
13494
(define_insn "*movdf_soft_insn"
13495
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
13496
- (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
13497
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
13498
+ (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
13499
"TARGET_32BIT && TARGET_SOFT_FLOAT
13500
&& ( register_operand (operands[0], DFmode)
13501
|| register_operand (operands[1], DFmode))"
13502
@@ -7617,23 +8103,64 @@
13503
;; if-conversion can not reduce to a conditional compare, so we do
13506
-(define_insn "*arm_cmpdi_insn"
13507
+(define_insn_and_split "*arm_cmpdi_insn"
13508
[(set (reg:CC_NCV CC_REGNUM)
13509
(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
13510
(match_operand:DI 1 "arm_di_operand" "rDi")))
13511
(clobber (match_scratch:SI 2 "=r"))]
13513
- "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
13514
+ "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
13515
+ "&& reload_completed"
13516
+ [(set (reg:CC CC_REGNUM)
13517
+ (compare:CC (match_dup 0) (match_dup 1)))
13518
+ (parallel [(set (reg:CC CC_REGNUM)
13519
+ (compare:CC (match_dup 3) (match_dup 4)))
13520
+ (set (match_dup 2)
13521
+ (minus:SI (match_dup 5)
13522
+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
13524
+ operands[3] = gen_highpart (SImode, operands[0]);
13525
+ operands[0] = gen_lowpart (SImode, operands[0]);
13526
+ if (CONST_INT_P (operands[1]))
13528
+ operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode,
13531
+ operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]);
13535
+ operands[4] = gen_highpart (SImode, operands[1]);
13536
+ operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]);
13538
+ operands[1] = gen_lowpart (SImode, operands[1]);
13539
+ operands[2] = gen_lowpart (SImode, operands[2]);
13541
[(set_attr "conds" "set")
13542
(set_attr "length" "8")]
13545
-(define_insn "*arm_cmpdi_unsigned"
13546
+(define_insn_and_split "*arm_cmpdi_unsigned"
13547
[(set (reg:CC_CZ CC_REGNUM)
13548
(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
13549
(match_operand:DI 1 "arm_di_operand" "rDi")))]
13551
- "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
13552
+ "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
13553
+ "&& reload_completed"
13554
+ [(set (reg:CC CC_REGNUM)
13555
+ (compare:CC (match_dup 2) (match_dup 3)))
13556
+ (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0))
13557
+ (set (reg:CC CC_REGNUM)
13558
+ (compare:CC (match_dup 0) (match_dup 1))))]
13560
+ operands[2] = gen_highpart (SImode, operands[0]);
13561
+ operands[0] = gen_lowpart (SImode, operands[0]);
13562
+ if (CONST_INT_P (operands[1]))
13563
+ operands[3] = gen_highpart_mode (SImode, DImode, operands[1]);
13565
+ operands[3] = gen_highpart (SImode, operands[1]);
13566
+ operands[1] = gen_lowpart (SImode, operands[1]);
13568
[(set_attr "conds" "set")
13569
(set_attr "length" "8")]
13571
@@ -7758,36 +8285,56 @@
13572
operands[3] = const0_rtx;"
13575
-(define_insn "*mov_scc"
13576
+(define_insn_and_split "*mov_scc"
13577
[(set (match_operand:SI 0 "s_register_operand" "=r")
13578
(match_operator:SI 1 "arm_comparison_operator"
13579
[(match_operand 2 "cc_register" "") (const_int 0)]))]
13581
- "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
13582
+ "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
13584
+ [(set (match_dup 0)
13585
+ (if_then_else:SI (match_dup 1)
13589
[(set_attr "conds" "use")
13590
- (set_attr "insn" "mov")
13591
(set_attr "length" "8")]
13594
-(define_insn "*mov_negscc"
13595
+(define_insn_and_split "*mov_negscc"
13596
[(set (match_operand:SI 0 "s_register_operand" "=r")
13597
(neg:SI (match_operator:SI 1 "arm_comparison_operator"
13598
[(match_operand 2 "cc_register" "") (const_int 0)])))]
13600
- "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
13601
+ "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
13603
+ [(set (match_dup 0)
13604
+ (if_then_else:SI (match_dup 1)
13608
+ operands[3] = GEN_INT (~0);
13610
[(set_attr "conds" "use")
13611
- (set_attr "insn" "mov")
13612
(set_attr "length" "8")]
13615
-(define_insn "*mov_notscc"
13616
+(define_insn_and_split "*mov_notscc"
13617
[(set (match_operand:SI 0 "s_register_operand" "=r")
13618
(not:SI (match_operator:SI 1 "arm_comparison_operator"
13619
[(match_operand 2 "cc_register" "") (const_int 0)])))]
13621
- "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
13622
+ "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
13624
+ [(set (match_dup 0)
13625
+ (if_then_else:SI (match_dup 1)
13629
+ operands[3] = GEN_INT (~1);
13630
+ operands[4] = GEN_INT (~0);
13632
[(set_attr "conds" "use")
13633
- (set_attr "insn" "mov")
13634
(set_attr "length" "8")]
13637
@@ -8110,7 +8657,40 @@
13641
-(define_insn "*movsicc_insn"
13642
+(define_insn "*cmov<mode>"
13643
+ [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
13644
+ (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator"
13645
+ [(match_operand 2 "cc_register" "") (const_int 0)])
13646
+ (match_operand:SDF 3 "s_register_operand"
13647
+ "<F_constraint>")
13648
+ (match_operand:SDF 4 "s_register_operand"
13649
+ "<F_constraint>")))]
13650
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
13653
+ enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
13660
+ return \"vsel%d1.<V_if_elem>\\t%<V_reg>0, %<V_reg>3, %<V_reg>4\";
13665
+ return \"vsel%D1.<V_if_elem>\\t%<V_reg>0, %<V_reg>4, %<V_reg>3\";
13667
+ gcc_unreachable ();
13671
+ [(set_attr "conds" "use")
13672
+ (set_attr "type" "f_sel<vfp_type>")]
13675
+(define_insn_and_split "*movsicc_insn"
13676
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
13678
(match_operator 3 "arm_comparison_operator"
13679
@@ -8123,10 +8703,45 @@
13683
- mov%d3\\t%0, %1\;mov%D3\\t%0, %2
13684
- mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
13685
- mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
13686
- mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
13691
+ ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2
13692
+ ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
13693
+ ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
13694
+ ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
13695
+ "&& reload_completed"
13698
+ enum rtx_code rev_code;
13699
+ enum machine_mode mode;
13702
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13704
+ gen_rtx_SET (VOIDmode,
13708
+ rev_code = GET_CODE (operands[3]);
13709
+ mode = GET_MODE (operands[4]);
13710
+ if (mode == CCFPmode || mode == CCFPEmode)
13711
+ rev_code = reverse_condition_maybe_unordered (rev_code);
13713
+ rev_code = reverse_condition (rev_code);
13715
+ rev_cond = gen_rtx_fmt_ee (rev_code,
13719
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13721
+ gen_rtx_SET (VOIDmode,
13726
[(set_attr "length" "4,4,4,4,8,8,8,8")
13727
(set_attr "conds" "use")
13728
(set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")
13729
@@ -9095,27 +9710,64 @@
13730
(set_attr "type" "alu_shift,alu_shift_reg")])
13733
-(define_insn "*and_scc"
13734
+(define_insn_and_split "*and_scc"
13735
[(set (match_operand:SI 0 "s_register_operand" "=r")
13736
(and:SI (match_operator:SI 1 "arm_comparison_operator"
13737
- [(match_operand 3 "cc_register" "") (const_int 0)])
13738
- (match_operand:SI 2 "s_register_operand" "r")))]
13739
+ [(match_operand 2 "cc_register" "") (const_int 0)])
13740
+ (match_operand:SI 3 "s_register_operand" "r")))]
13742
- "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
13743
+ "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
13744
+ "&& reload_completed"
13745
+ [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
13746
+ (cond_exec (match_dup 4) (set (match_dup 0)
13747
+ (and:SI (match_dup 3) (const_int 1))))]
13749
+ enum machine_mode mode = GET_MODE (operands[2]);
13750
+ enum rtx_code rc = GET_CODE (operands[1]);
13752
+ /* Note that operands[4] is the same as operands[1],
13753
+ but with VOIDmode as the result. */
13754
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
13755
+ if (mode == CCFPmode || mode == CCFPEmode)
13756
+ rc = reverse_condition_maybe_unordered (rc);
13758
+ rc = reverse_condition (rc);
13759
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
13761
[(set_attr "conds" "use")
13762
(set_attr "insn" "mov")
13763
(set_attr "length" "8")]
13766
-(define_insn "*ior_scc"
13767
+(define_insn_and_split "*ior_scc"
13768
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
13769
- (ior:SI (match_operator:SI 2 "arm_comparison_operator"
13770
- [(match_operand 3 "cc_register" "") (const_int 0)])
13771
- (match_operand:SI 1 "s_register_operand" "0,?r")))]
13772
+ (ior:SI (match_operator:SI 1 "arm_comparison_operator"
13773
+ [(match_operand 2 "cc_register" "") (const_int 0)])
13774
+ (match_operand:SI 3 "s_register_operand" "0,?r")))]
13777
- orr%d2\\t%0, %1, #1
13778
- mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
13779
+ orr%d1\\t%0, %3, #1
13781
+ "&& reload_completed
13782
+ && REGNO (operands [0]) != REGNO (operands[3])"
13783
+ ;; && which_alternative == 1
13784
+ ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
13785
+ [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
13786
+ (cond_exec (match_dup 4) (set (match_dup 0)
13787
+ (ior:SI (match_dup 3) (const_int 1))))]
13789
+ enum machine_mode mode = GET_MODE (operands[2]);
13790
+ enum rtx_code rc = GET_CODE (operands[1]);
13792
+ /* Note that operands[4] is the same as operands[1],
13793
+ but with VOIDmode as the result. */
13794
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
13795
+ if (mode == CCFPmode || mode == CCFPEmode)
13796
+ rc = reverse_condition_maybe_unordered (rc);
13798
+ rc = reverse_condition (rc);
13799
+ operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
13801
[(set_attr "conds" "use")
13802
(set_attr "length" "4,8")]
13804
@@ -9822,24 +10474,75 @@
13806
;; ??? The conditional patterns above need checking for Thumb-2 usefulness
13808
-(define_insn "*negscc"
13809
+(define_insn_and_split "*negscc"
13810
[(set (match_operand:SI 0 "s_register_operand" "=r")
13811
(neg:SI (match_operator 3 "arm_comparison_operator"
13812
[(match_operand:SI 1 "s_register_operand" "r")
13813
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
13814
(clobber (reg:CC CC_REGNUM))]
13817
- if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
13818
- return \"mov\\t%0, %1, asr #31\";
13820
+ "&& reload_completed"
13823
+ rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
13825
- if (GET_CODE (operands[3]) == NE)
13826
- return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\";
13827
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
13829
+ /* Emit mov\\t%0, %1, asr #31 */
13830
+ emit_insn (gen_rtx_SET (VOIDmode,
13832
+ gen_rtx_ASHIFTRT (SImode,
13837
+ else if (GET_CODE (operands[3]) == NE)
13839
+ /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */
13840
+ if (CONST_INT_P (operands[2]))
13841
+ emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
13842
+ GEN_INT (- INTVAL (operands[2]))));
13844
+ emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
13846
- output_asm_insn (\"cmp\\t%1, %2\", operands);
13847
- output_asm_insn (\"mov%D3\\t%0, #0\", operands);
13848
- return \"mvn%d3\\t%0, #0\";
13850
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13851
+ gen_rtx_NE (SImode,
13854
+ gen_rtx_SET (SImode,
13861
+ /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
13862
+ emit_insn (gen_rtx_SET (VOIDmode,
13864
+ gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
13865
+ enum rtx_code rc = GET_CODE (operands[3]);
13867
+ rc = reverse_condition (rc);
13868
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13869
+ gen_rtx_fmt_ee (rc,
13873
+ gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
13874
+ rc = GET_CODE (operands[3]);
13875
+ emit_insn (gen_rtx_COND_EXEC (VOIDmode,
13876
+ gen_rtx_fmt_ee (rc,
13880
+ gen_rtx_SET (VOIDmode,
13887
[(set_attr "conds" "clob")
13888
(set_attr "length" "12")]
13890
@@ -11626,6 +12329,9 @@
13891
(set_attr "predicable" "yes")])
13894
+;; Load the load/store double peephole optimizations.
13895
+(include "ldrdstrd.md")
13897
;; Load the load/store multiple patterns
13898
(include "ldmstm.md")
13900
--- a/src/gcc/config/arm/neon-gen.ml
13901
+++ b/src/gcc/config/arm/neon-gen.ml
13902
@@ -121,6 +121,7 @@
13903
| T_uint16 | T_int16 -> T_intHI
13904
| T_uint32 | T_int32 -> T_intSI
13905
| T_uint64 | T_int64 -> T_intDI
13906
+ | T_float16 -> T_floatHF
13907
| T_float32 -> T_floatSF
13908
| T_poly8 -> T_intQI
13909
| T_poly16 -> T_intHI
13910
@@ -275,8 +276,8 @@
13911
let mode = mode_of_elt elttype shape in
13912
string_of_mode mode
13913
with MixedMode (dst, src) ->
13914
- let dstmode = mode_of_elt dst shape
13915
- and srcmode = mode_of_elt src shape in
13916
+ let dstmode = mode_of_elt ~argpos:0 dst shape
13917
+ and srcmode = mode_of_elt ~argpos:1 src shape in
13918
string_of_mode dstmode ^ string_of_mode srcmode
13920
let get_shuffle features =
13921
@@ -291,19 +292,24 @@
13922
match List.find (fun feature ->
13923
match feature with Requires_feature _ -> true
13924
| Requires_arch _ -> true
13925
+ | Requires_FP_bit _ -> true
13928
- Requires_feature feature ->
13929
+ Requires_feature feature ->
13930
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
13931
| Requires_arch arch ->
13932
Format.printf "#if __ARM_ARCH >= %d@\n" arch
13933
+ | Requires_FP_bit bit ->
13934
+ Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
13936
| _ -> assert false
13937
with Not_found -> assert true
13939
let print_feature_test_end features =
13941
- List.exists (function Requires_feature x -> true
13942
- | Requires_arch x -> true
13943
+ List.exists (function Requires_feature _ -> true
13944
+ | Requires_arch _ -> true
13945
+ | Requires_FP_bit _ -> true
13946
| _ -> false) features in
13947
if feature then Format.printf "#endif@\n"
13949
@@ -365,6 +371,7 @@
13950
"__builtin_neon_hi", "int", 16, 4;
13951
"__builtin_neon_si", "int", 32, 2;
13952
"__builtin_neon_di", "int", 64, 1;
13953
+ "__builtin_neon_hf", "float", 16, 4;
13954
"__builtin_neon_sf", "float", 32, 2;
13955
"__builtin_neon_poly8", "poly", 8, 8;
13956
"__builtin_neon_poly16", "poly", 16, 4;
13957
--- a/src/libobjc/ChangeLog.linaro
13958
+++ b/src/libobjc/ChangeLog.linaro
13960
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13962
+ * GCC Linaro 4.8-2013.04 released.
13963
--- a/src/libgfortran/ChangeLog.linaro
13964
+++ b/src/libgfortran/ChangeLog.linaro
13966
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13968
+ * GCC Linaro 4.8-2013.04 released.
13969
--- a/src/libada/ChangeLog.linaro
13970
+++ b/src/libada/ChangeLog.linaro
13972
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13974
+ * GCC Linaro 4.8-2013.04 released.
13975
--- a/src/libffi/ChangeLog.linaro
13976
+++ b/src/libffi/ChangeLog.linaro
13978
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13980
+ * GCC Linaro 4.8-2013.04 released.
13981
--- a/src/libssp/ChangeLog.linaro
13982
+++ b/src/libssp/ChangeLog.linaro
13984
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13986
+ * GCC Linaro 4.8-2013.04 released.
13987
--- a/src/libcpp/ChangeLog.linaro
13988
+++ b/src/libcpp/ChangeLog.linaro
13990
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13992
+ * GCC Linaro 4.8-2013.04 released.
13993
--- a/src/libcpp/po/ChangeLog.linaro
13994
+++ b/src/libcpp/po/ChangeLog.linaro
13996
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
13998
+ * GCC Linaro 4.8-2013.04 released.
13999
--- a/src/fixincludes/ChangeLog.linaro
14000
+++ b/src/fixincludes/ChangeLog.linaro
14002
+2013-04-09 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
14004
+ * GCC Linaro 4.8-2013.04 released.