1
# DP: Changes for the Linaro 5-2015.09 release.
3
LANG=C git diff 2006973fa839ccbe189a1e7408400dc96ed880b4..ac19ac6481a3f326d9f41403f5dadab548b2c8a6 \
4
| egrep -v '^(diff|index) ' \
5
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/
7
--- a/src/fixincludes/mkfixinc.sh
8
+++ b/src/fixincludes/mkfixinc.sh
9
@@ -19,7 +19,8 @@ case $machine in
12
powerpcle-*-eabisim* | \
14
+ powerpcle-*-eabi* | \
16
# IF there is no include fixing,
17
# THEN create a no-op fixer and exit
18
(echo "#! /bin/sh" ; echo "exit 0" ) > ${target}
20
+++ b/src/gcc/LINARO-VERSION
23
--- a/src/gcc/Makefile.in
24
+++ b/src/gcc/Makefile.in
25
@@ -527,10 +527,6 @@ xm_include_list=@xm_include_list@
26
xm_defines=@xm_defines@
28
lang_checks_parallelized=
29
-# Upper limit to which it is useful to parallelize this lang target.
30
-# It doesn't make sense to try e.g. 128 goals for small testsuites
32
-check_gcc_parallelize=10000
33
lang_opt_files=@lang_opt_files@ $(srcdir)/c-family/c.opt $(srcdir)/common.opt
34
lang_specs_files=@lang_specs_files@
35
lang_tree_files=@lang_tree_files@
36
@@ -814,10 +810,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
37
DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
38
DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty
39
REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX]
40
+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]
42
BASEVER_c := $(shell cat $(BASEVER))
43
DEVPHASE_c := $(shell cat $(DEVPHASE))
44
DATESTAMP_c := $(shell cat $(DATESTAMP))
45
+LINAROVER_c := $(shell cat $(LINAROVER))
47
ifeq (,$(wildcard $(REVISION)))
49
@@ -844,6 +842,7 @@ DATESTAMP_s := \
50
"\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
51
PKGVERSION_s:= "\"@PKGVERSION@\""
52
BUGURL_s := "\"@REPORT_BUGS_TO@\""
53
+LINAROVER_s := "\"$(LINAROVER_c)\""
55
PKGVERSION := @PKGVERSION@
56
BUGURL_TEXI := @REPORT_BUGS_TEXI@
57
@@ -2623,8 +2622,9 @@ PREPROCESSOR_DEFINES = \
58
-DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
59
@TARGET_SYSTEM_ROOT_DEFINE@
61
-CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
62
-cppbuiltin.o: $(BASEVER)
63
+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \
64
+ -DLINAROVER=$(LINAROVER_s)
65
+cppbuiltin.o: $(BASEVER) $(LINAROVER)
67
CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
69
@@ -3736,7 +3736,9 @@ check_p_subdirs=$(wordlist 1,$(check_p_count),$(wordlist 1, \
71
# To parallelize some language check, add the corresponding check-$lang
72
# to lang_checks_parallelized variable and define check_$lang_parallelize
73
-# variable (see above check_gcc_parallelize description).
74
+# variable. This is the upper limit to which it is useful to parallelize the
75
+# check-$lang target. It doesn't make sense to try e.g. 128 goals for small
76
+# testsuites like objc or go.
77
$(lang_checks_parallelized): check-% : site.exp
78
-rm -rf $(TESTSUITEDIR)/$*-parallel
79
@if [ "$(filter -j, $(MFLAGS))" = "-j" ]; then \
80
--- a/src/gcc/ada/gcc-interface/Make-lang.in
81
+++ b/src/gcc/ada/gcc-interface/Make-lang.in
82
@@ -811,6 +811,7 @@ ada.mostlyclean:
83
-$(RM) ada/*$(coverageexts)
84
-$(RM) ada/sdefault.adb ada/stamp-sdefault ada/stamp-snames
86
+ -$(RM) gnatbind$(exeext) gnat1$(exeext)
90
--- a/src/gcc/c/Make-lang.in
91
+++ b/src/gcc/c/Make-lang.in
92
@@ -95,6 +95,8 @@ c.srcman:
93
# List of targets that can use the generic check- rule and its // variant.
94
lang_checks += check-gcc
95
lang_checks_parallelized += check-gcc
96
+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
97
+check_gcc_parallelize=10000
99
# 'make check' in gcc/ looks for check-c. Redirect it to check-gcc.
101
--- a/src/gcc/combine.c
102
+++ b/src/gcc/combine.c
103
@@ -1650,6 +1650,73 @@ setup_incoming_promotions (rtx_insn *first)
107
+#ifdef SHORT_IMMEDIATES_SIGN_EXTEND
108
+/* If MODE has a precision lower than PREC and SRC is a non-negative constant
109
+ that would appear negative in MODE, sign-extend SRC for use in nonzero_bits
110
+ because some machines (maybe most) will actually do the sign-extension and
111
+ this is the conservative approach.
113
+ ??? For 2.5, try to tighten up the MD files in this regard instead of this
117
+sign_extend_short_imm (rtx src, machine_mode mode, unsigned int prec)
119
+ if (GET_MODE_PRECISION (mode) < prec
120
+ && CONST_INT_P (src)
121
+ && INTVAL (src) > 0
122
+ && val_signbit_known_set_p (mode, INTVAL (src)))
123
+ src = GEN_INT (INTVAL (src) | ~GET_MODE_MASK (mode));
129
+/* Update RSP for pseudo-register X from INSN's REG_EQUAL note (if one exists)
133
+update_rsp_from_reg_equal (reg_stat_type *rsp, rtx_insn *insn, const_rtx set,
136
+ rtx reg_equal_note = insn ? find_reg_equal_equiv_note (insn) : NULL_RTX;
137
+ unsigned HOST_WIDE_INT bits = 0;
138
+ rtx reg_equal = NULL, src = SET_SRC (set);
139
+ unsigned int num = 0;
141
+ if (reg_equal_note)
142
+ reg_equal = XEXP (reg_equal_note, 0);
144
+#ifdef SHORT_IMMEDIATES_SIGN_EXTEND
145
+ src = sign_extend_short_imm (src, GET_MODE (x), BITS_PER_WORD);
147
+ reg_equal = sign_extend_short_imm (reg_equal, GET_MODE (x), BITS_PER_WORD);
150
+ /* Don't call nonzero_bits if it cannot change anything. */
151
+ if (rsp->nonzero_bits != ~(unsigned HOST_WIDE_INT) 0)
153
+ bits = nonzero_bits (src, nonzero_bits_mode);
154
+ if (reg_equal && bits)
155
+ bits &= nonzero_bits (reg_equal, nonzero_bits_mode);
156
+ rsp->nonzero_bits |= bits;
159
+ /* Don't call num_sign_bit_copies if it cannot change anything. */
160
+ if (rsp->sign_bit_copies != 1)
162
+ num = num_sign_bit_copies (SET_SRC (set), GET_MODE (x));
163
+ if (reg_equal && num != GET_MODE_PRECISION (GET_MODE (x)))
165
+ unsigned int numeq = num_sign_bit_copies (reg_equal, GET_MODE (x));
166
+ if (num == 0 || numeq > num)
169
+ if (rsp->sign_bit_copies == 0 || num < rsp->sign_bit_copies)
170
+ rsp->sign_bit_copies = num;
174
/* Called via note_stores. If X is a pseudo that is narrower than
175
HOST_BITS_PER_WIDE_INT and is being set, record what bits are known zero.
177
@@ -1665,7 +1732,6 @@ static void
178
set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data)
180
rtx_insn *insn = (rtx_insn *) data;
184
&& REGNO (x) >= FIRST_PSEUDO_REGISTER
185
@@ -1725,34 +1791,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data)
186
if (SET_DEST (set) == x
187
|| (paradoxical_subreg_p (SET_DEST (set))
188
&& SUBREG_REG (SET_DEST (set)) == x))
190
- rtx src = SET_SRC (set);
192
-#ifdef SHORT_IMMEDIATES_SIGN_EXTEND
193
- /* If X is narrower than a word and SRC is a non-negative
194
- constant that would appear negative in the mode of X,
195
- sign-extend it for use in reg_stat[].nonzero_bits because some
196
- machines (maybe most) will actually do the sign-extension
197
- and this is the conservative approach.
199
- ??? For 2.5, try to tighten up the MD files in this regard
200
- instead of this kludge. */
202
- if (GET_MODE_PRECISION (GET_MODE (x)) < BITS_PER_WORD
203
- && CONST_INT_P (src)
204
- && INTVAL (src) > 0
205
- && val_signbit_known_set_p (GET_MODE (x), INTVAL (src)))
206
- src = GEN_INT (INTVAL (src) | ~GET_MODE_MASK (GET_MODE (x)));
209
- /* Don't call nonzero_bits if it cannot change anything. */
210
- if (rsp->nonzero_bits != ~(unsigned HOST_WIDE_INT) 0)
211
- rsp->nonzero_bits |= nonzero_bits (src, nonzero_bits_mode);
212
- num = num_sign_bit_copies (SET_SRC (set), GET_MODE (x));
213
- if (rsp->sign_bit_copies == 0
214
- || rsp->sign_bit_copies > num)
215
- rsp->sign_bit_copies = num;
217
+ update_rsp_from_reg_equal (rsp, insn, set, x);
220
rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x));
221
@@ -1914,6 +1953,15 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED,
222
set = expand_field_assignment (set);
223
src = SET_SRC (set), dest = SET_DEST (set);
225
+ /* Do not eliminate user-specified register if it is in an
226
+ asm input because we may break the register asm usage defined
227
+ in GCC manual if allow to do so.
228
+ Be aware that this may cover more cases than we expect but this
229
+ should be harmless. */
230
+ if (REG_P (dest) && REG_USERVAR_P (dest) && HARD_REGISTER_P (dest)
231
+ && extract_asm_operands (PATTERN (i3)))
234
/* Don't eliminate a store in the stack pointer. */
235
if (dest == stack_pointer_rtx
236
/* Don't combine with an insn that sets a register to itself if it has
237
@@ -7723,9 +7771,8 @@ extract_left_shift (rtx x, int count)
238
We try, as much as possible, to re-use rtl expressions to save memory.
240
IN_CODE says what kind of expression we are processing. Normally, it is
241
- SET. In a memory address (inside a MEM, PLUS or minus, the latter two
242
- being kludges), it is MEM. When processing the arguments of a comparison
243
- or a COMPARE against zero, it is COMPARE. */
244
+ SET. In a memory address it is MEM. When processing the arguments of
245
+ a comparison or a COMPARE against zero, it is COMPARE. */
248
make_compound_operation (rtx x, enum rtx_code in_code)
249
@@ -7745,8 +7792,6 @@ make_compound_operation (rtx x, enum rtx_code in_code)
250
but once inside, go back to our default of SET. */
252
next_code = (code == MEM ? MEM
253
- : ((code == PLUS || code == MINUS)
254
- && SCALAR_INT_MODE_P (mode)) ? MEM
255
: ((code == COMPARE || COMPARISON_P (x))
256
&& XEXP (x, 1) == const0_rtx) ? COMPARE
257
: in_code == COMPARE ? SET : in_code);
258
@@ -9797,20 +9842,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode,
261
#ifdef SHORT_IMMEDIATES_SIGN_EXTEND
262
- /* If X is narrower than MODE and TEM is a non-negative
263
- constant that would appear negative in the mode of X,
264
- sign-extend it for use in reg_nonzero_bits because some
265
- machines (maybe most) will actually do the sign-extension
266
- and this is the conservative approach.
268
- ??? For 2.5, try to tighten up the MD files in this regard
269
- instead of this kludge. */
271
- if (GET_MODE_PRECISION (GET_MODE (x)) < GET_MODE_PRECISION (mode)
272
- && CONST_INT_P (tem)
273
- && INTVAL (tem) > 0
274
- && val_signbit_known_set_p (GET_MODE (x), INTVAL (tem)))
275
- tem = GEN_INT (INTVAL (tem) | ~GET_MODE_MASK (GET_MODE (x)));
276
+ tem = sign_extend_short_imm (tem, GET_MODE (x),
277
+ GET_MODE_PRECISION (mode));
281
--- a/src/gcc/config.gcc
282
+++ b/src/gcc/config.gcc
283
@@ -575,7 +575,7 @@ case ${target} in
286
# Common C libraries.
287
-tm_defines="$tm_defines LIBC_GLIBC=1 LIBC_UCLIBC=2 LIBC_BIONIC=3"
288
+tm_defines="$tm_defines LIBC_GLIBC=1 LIBC_UCLIBC=2 LIBC_BIONIC=3 LIBC_MUSL=4"
290
# 32-bit x86 processors supported by --with-arch=. Each processor
291
# MUST be separated by exactly one space.
292
@@ -720,6 +720,9 @@ case ${target} in
294
tm_defines="$tm_defines DEFAULT_LIBC=LIBC_UCLIBC"
297
+ tm_defines="$tm_defines DEFAULT_LIBC=LIBC_MUSL"
300
tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"
302
--- a/src/gcc/config.host
303
+++ b/src/gcc/config.host
304
@@ -99,6 +99,14 @@ case ${host} in
311
+ host_extra_gcc_objs="driver-aarch64.o"
312
+ host_xmake_file="${host_xmake_file} aarch64/x-aarch64"
316
arm*-*-freebsd* | arm*-*-linux*)
319
--- a/src/gcc/config/aarch64/aarch64-cores.def
320
+++ b/src/gcc/config/aarch64/aarch64-cores.def
323
Before using #include to read this file, define a macro:
325
- AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS)
326
+ AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS, IMP, PART)
328
The CORE_NAME is the name of the core, represented as a string constant.
329
The CORE_IDENT is the name of the core, represented as an identifier.
331
ARCH is the architecture revision implemented by the chip.
332
FLAGS are the bitwise-or of the traits that apply to that core.
333
This need not include flags implied by the architecture.
334
- COSTS is the name of the rtx_costs routine to use. */
335
+ COSTS is the name of the rtx_costs routine to use.
336
+ IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can
337
+ be found in /proc/cpuinfo.
338
+ PART is the part number of the CPU. On a GNU/Linux system it can be found
339
+ in /proc/cpuinfo. For big.LITTLE systems this should have the form at of
340
+ "<big core part number>.<LITTLE core part number>". */
342
/* V8 Architecture Processors. */
344
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)
345
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
346
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
347
-AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
348
-AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
349
-AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1)
350
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
351
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
352
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08")
353
+AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001")
354
+AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
355
+AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
357
/* V8 big.LITTLE implementations. */
359
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
360
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
361
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
362
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03")
363
--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
364
+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
365
@@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs =
366
0, /* N/A: Stm_regs_per_insn_subsequent. */
369
- COSTS_N_INSNS (1) /* Store_unaligned. */
370
+ COSTS_N_INSNS (1), /* Store_unaligned. */
371
+ COSTS_N_INSNS (1), /* Loadv. */
372
+ COSTS_N_INSNS (1) /* Storev. */
376
--- a/src/gcc/config/aarch64/aarch64-elf.h
377
+++ b/src/gcc/config/aarch64/aarch64-elf.h
379
#undef DRIVER_SELF_SPECS
380
#define DRIVER_SELF_SPECS \
381
" %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \
382
- " %{!mabi=*:" ABI_SPEC "}"
383
+ " %{!mabi=*:" ABI_SPEC "}" \
384
+ MCPU_MTUNE_NATIVE_SPECS
386
#ifdef HAVE_AS_MABI_OPTION
387
#define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}"
388
--- a/src/gcc/config/aarch64/aarch64-linux.h
389
+++ b/src/gcc/config/aarch64/aarch64-linux.h
392
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
394
+#undef MUSL_DYNAMIC_LINKER
395
+#define MUSL_DYNAMIC_LINKER "/lib/ld-musl-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
398
#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
400
--- a/src/gcc/config/aarch64/aarch64-option-extensions.def
401
+++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
404
Before using #include to read this file, define a macro:
406
- AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF)
407
+ AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING)
409
EXT_NAME is the name of the extension, represented as a string constant.
410
FLAGS_ON are the bitwise-or of the features that the extension adds.
411
- FLAGS_OFF are the bitwise-or of the features that the extension removes. */
412
+ FLAGS_OFF are the bitwise-or of the features that the extension removes.
413
+ FEAT_STRING is a string containing the entries in the 'Features' field of
414
+ /proc/cpuinfo on a GNU/Linux system that correspond to this architecture
415
+ extension being available. Sometimes multiple entries are needed to enable
416
+ the extension (for example, the 'crypto' extension depends on four
417
+ entries: aes, pmull, sha1, sha2 being present). In that case this field
418
+ should contain a whitespace-separated list of the strings in 'Features'
419
+ that are required. Their order is not important. */
421
/* V8 Architecture Extensions.
422
This list currently contains example extensions for CPUs that implement
423
AArch64, and therefore serves as a template for adding more CPUs in the
426
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO)
427
-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO)
428
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO)
429
-AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC)
430
+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")
431
+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd")
432
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")
433
+AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32")
434
--- a/src/gcc/config/aarch64/aarch64-opts.h
435
+++ b/src/gcc/config/aarch64/aarch64-opts.h
437
/* The various cores that implement AArch64. */
438
enum aarch64_processor
440
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
441
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
443
#include "aarch64-cores.def"
445
--- a/src/gcc/config/aarch64/aarch64-protos.h
446
+++ b/src/gcc/config/aarch64/aarch64-protos.h
447
@@ -162,12 +162,20 @@ struct cpu_vector_cost
448
const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
452
+struct cpu_branch_cost
454
+ const int predictable; /* Predictable branch or optimizing for size. */
455
+ const int unpredictable; /* Unpredictable branch or optimizing for speed. */
460
const struct cpu_cost_table *const insn_extra_cost;
461
const struct cpu_addrcost_table *const addr_cost;
462
const struct cpu_regmove_cost *const regmove_cost;
463
const struct cpu_vector_cost *const vec_costs;
464
+ const struct cpu_branch_cost *const branch_costs;
465
const int memmov_cost;
466
const int issue_rate;
467
const unsigned int fuseable_ops;
468
@@ -177,11 +185,14 @@ struct tune_params
469
const int int_reassoc_width;
470
const int fp_reassoc_width;
471
const int vec_reassoc_width;
472
+ const int min_div_recip_mul_sf;
473
+ const int min_div_recip_mul_df;
476
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
477
int aarch64_get_condition_code (rtx);
478
bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
479
+int aarch64_branch_cost (bool, bool);
480
enum aarch64_symbol_type
481
aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
482
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
483
@@ -264,12 +275,6 @@ void init_aarch64_simd_builtins (void);
485
void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int);
487
-/* Emit code to place a AdvSIMD pair result in memory locations (with equal
489
-void aarch64_simd_emit_pair_result_insn (machine_mode,
490
- rtx (*intfn) (rtx, rtx, rtx), rtx,
493
/* Expand builtins for SIMD intrinsics. */
494
rtx aarch64_simd_expand_builtin (int, tree, rtx);
496
--- a/src/gcc/config/aarch64/aarch64-simd.md
497
+++ b/src/gcc/config/aarch64/aarch64-simd.md
498
@@ -2057,13 +2057,13 @@
501
(define_expand "aarch64_vcond_internal<mode><mode>"
502
- [(set (match_operand:VDQ_I 0 "register_operand")
503
- (if_then_else:VDQ_I
504
+ [(set (match_operand:VSDQ_I_DI 0 "register_operand")
505
+ (if_then_else:VSDQ_I_DI
506
(match_operator 3 "comparison_operator"
507
- [(match_operand:VDQ_I 4 "register_operand")
508
- (match_operand:VDQ_I 5 "nonmemory_operand")])
509
- (match_operand:VDQ_I 1 "nonmemory_operand")
510
- (match_operand:VDQ_I 2 "nonmemory_operand")))]
511
+ [(match_operand:VSDQ_I_DI 4 "register_operand")
512
+ (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
513
+ (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
514
+ (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
517
rtx op1 = operands[1];
518
@@ -2365,13 +2365,13 @@
521
(define_expand "vcond<mode><mode>"
522
- [(set (match_operand:VALL 0 "register_operand")
524
+ [(set (match_operand:VALLDI 0 "register_operand")
525
+ (if_then_else:VALLDI
526
(match_operator 3 "comparison_operator"
527
- [(match_operand:VALL 4 "register_operand")
528
- (match_operand:VALL 5 "nonmemory_operand")])
529
- (match_operand:VALL 1 "nonmemory_operand")
530
- (match_operand:VALL 2 "nonmemory_operand")))]
531
+ [(match_operand:VALLDI 4 "register_operand")
532
+ (match_operand:VALLDI 5 "nonmemory_operand")])
533
+ (match_operand:VALLDI 1 "nonmemory_operand")
534
+ (match_operand:VALLDI 2 "nonmemory_operand")))]
537
emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
538
@@ -2398,13 +2398,13 @@
541
(define_expand "vcondu<mode><mode>"
542
- [(set (match_operand:VDQ_I 0 "register_operand")
543
- (if_then_else:VDQ_I
544
+ [(set (match_operand:VSDQ_I_DI 0 "register_operand")
545
+ (if_then_else:VSDQ_I_DI
546
(match_operator 3 "comparison_operator"
547
- [(match_operand:VDQ_I 4 "register_operand")
548
- (match_operand:VDQ_I 5 "nonmemory_operand")])
549
- (match_operand:VDQ_I 1 "nonmemory_operand")
550
- (match_operand:VDQ_I 2 "nonmemory_operand")))]
551
+ [(match_operand:VSDQ_I_DI 4 "register_operand")
552
+ (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
553
+ (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
554
+ (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
557
emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
558
@@ -3955,6 +3955,7 @@
559
[(set_attr "type" "neon_store2_2reg<q>")]
562
+;; RTL uses GCC vector extension indices, so flip only for assembly.
563
(define_insn "vec_store_lanesoi_lane<mode>"
564
[(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
565
(unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
566
@@ -3962,7 +3963,10 @@
567
(match_operand:SI 2 "immediate_operand" "i")]
570
- "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
572
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
573
+ return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
575
[(set_attr "type" "neon_store3_one_lane<q>")]
578
@@ -4046,6 +4050,7 @@
579
[(set_attr "type" "neon_store3_3reg<q>")]
582
+;; RTL uses GCC vector extension indices, so flip only for assembly.
583
(define_insn "vec_store_lanesci_lane<mode>"
584
[(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
585
(unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
586
@@ -4053,7 +4058,10 @@
587
(match_operand:SI 2 "immediate_operand" "i")]
590
- "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
592
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
593
+ return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
595
[(set_attr "type" "neon_store3_one_lane<q>")]
598
@@ -4137,6 +4145,7 @@
599
[(set_attr "type" "neon_store4_4reg<q>")]
602
+;; RTL uses GCC vector extension indices, so flip only for assembly.
603
(define_insn "vec_store_lanesxi_lane<mode>"
604
[(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
605
(unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
606
@@ -4144,7 +4153,10 @@
607
(match_operand:SI 2 "immediate_operand" "i")]
610
- "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
612
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
613
+ return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
615
[(set_attr "type" "neon_store4_one_lane<q>")]
618
--- a/src/gcc/config/aarch64/aarch64.c
619
+++ b/src/gcc/config/aarch64/aarch64.c
620
@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =
621
#define AARCH64_FUSE_ADRP_LDR (1 << 3)
622
#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
624
+/* Generic costs for branch instructions. */
625
+static const struct cpu_branch_cost generic_branch_cost =
627
+ 2, /* Predictable. */
628
+ 2 /* Unpredictable. */
631
static const struct tune_params generic_tunings =
633
&cortexa57_extra_costs,
634
&generic_addrcost_table,
635
&generic_regmove_cost,
636
&generic_vector_cost,
637
+ &generic_branch_cost,
640
AARCH64_FUSE_NOTHING, /* fuseable_ops */
641
@@ -353,7 +361,9 @@ static const struct tune_params generic_tunings =
643
2, /* int_reassoc_width. */
644
4, /* fp_reassoc_width. */
645
- 1 /* vec_reassoc_width. */
646
+ 1, /* vec_reassoc_width. */
647
+ 2, /* min_div_recip_mul_sf. */
648
+ 2 /* min_div_recip_mul_df. */
651
static const struct tune_params cortexa53_tunings =
652
@@ -362,6 +372,7 @@ static const struct tune_params cortexa53_tunings =
653
&generic_addrcost_table,
654
&cortexa53_regmove_cost,
655
&generic_vector_cost,
656
+ &generic_branch_cost,
659
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
660
@@ -371,7 +382,9 @@ static const struct tune_params cortexa53_tunings =
662
2, /* int_reassoc_width. */
663
4, /* fp_reassoc_width. */
664
- 1 /* vec_reassoc_width. */
665
+ 1, /* vec_reassoc_width. */
666
+ 2, /* min_div_recip_mul_sf. */
667
+ 2 /* min_div_recip_mul_df. */
670
static const struct tune_params cortexa57_tunings =
671
@@ -380,6 +393,7 @@ static const struct tune_params cortexa57_tunings =
672
&cortexa57_addrcost_table,
673
&cortexa57_regmove_cost,
674
&cortexa57_vector_cost,
675
+ &generic_branch_cost,
678
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
679
@@ -389,7 +403,9 @@ static const struct tune_params cortexa57_tunings =
681
2, /* int_reassoc_width. */
682
4, /* fp_reassoc_width. */
683
- 1 /* vec_reassoc_width. */
684
+ 1, /* vec_reassoc_width. */
685
+ 2, /* min_div_recip_mul_sf. */
686
+ 2 /* min_div_recip_mul_df. */
689
static const struct tune_params thunderx_tunings =
690
@@ -398,6 +414,7 @@ static const struct tune_params thunderx_tunings =
691
&generic_addrcost_table,
692
&thunderx_regmove_cost,
693
&generic_vector_cost,
694
+ &generic_branch_cost,
697
AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
698
@@ -406,7 +423,9 @@ static const struct tune_params thunderx_tunings =
700
2, /* int_reassoc_width. */
701
4, /* fp_reassoc_width. */
702
- 1 /* vec_reassoc_width. */
703
+ 1, /* vec_reassoc_width. */
704
+ 2, /* min_div_recip_mul_sf. */
705
+ 2 /* min_div_recip_mul_df. */
708
static const struct tune_params xgene1_tunings =
709
@@ -415,6 +434,7 @@ static const struct tune_params xgene1_tunings =
710
&xgene1_addrcost_table,
711
&xgene1_regmove_cost,
713
+ &generic_branch_cost,
716
AARCH64_FUSE_NOTHING, /* fuseable_ops */
717
@@ -423,7 +443,9 @@ static const struct tune_params xgene1_tunings =
718
16, /* loop_align. */
719
2, /* int_reassoc_width. */
720
4, /* fp_reassoc_width. */
721
- 1 /* vec_reassoc_width. */
722
+ 1, /* vec_reassoc_width. */
723
+ 2, /* min_div_recip_mul_sf. */
724
+ 2 /* min_div_recip_mul_df. */
727
/* A processor implementing AArch64. */
728
@@ -440,7 +462,7 @@ struct processor
729
/* Processor cores implementing AArch64. */
730
static const struct processor all_cores[] =
732
-#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
733
+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
734
{NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
735
#include "aarch64-cores.def"
737
@@ -477,7 +499,7 @@ struct aarch64_option_extension
738
/* ISA extensions in AArch64. */
739
static const struct aarch64_option_extension all_extensions[] =
741
-#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
742
+#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
743
{NAME, FLAGS_ON, FLAGS_OFF},
744
#include "aarch64-option-extensions.def"
745
#undef AARCH64_OPT_EXTENSION
746
@@ -512,9 +534,11 @@ static const char * const aarch64_condition_codes[] =
750
-aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
751
+aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
754
+ if (GET_MODE_UNIT_SIZE (mode) == 4)
755
+ return aarch64_tune_params->min_div_recip_mul_sf;
756
+ return aarch64_tune_params->min_div_recip_mul_df;
760
@@ -4901,8 +4925,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
764
- aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
765
- (GET_MODE_SIZE (mode) + 7) / 8;
766
+ aarch64_vector_mode_p (mode)
767
+ ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
768
+ : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
772
@@ -5157,9 +5182,18 @@ aarch64_strip_extend (rtx x)
776
+/* Return true iff CODE is a shift supported in combination
777
+ with arithmetic instructions. */
780
+aarch64_shift_p (enum rtx_code code)
782
+ return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
785
/* Helper function for rtx cost calculation. Calculate the cost of
786
- a MULT, which may be part of a multiply-accumulate rtx. Return
787
- the calculated cost of the expression, recursing manually in to
788
+ a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
789
+ Return the calculated cost of the expression, recursing manually in to
790
operands where needed. */
793
@@ -5169,7 +5203,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
794
const struct cpu_cost_table *extra_cost
795
= aarch64_tune_params->insn_extra_cost;
797
- bool maybe_fma = (outer == PLUS || outer == MINUS);
798
+ bool compound_p = (outer == PLUS || outer == MINUS);
799
machine_mode mode = GET_MODE (x);
801
gcc_checking_assert (code == MULT);
802
@@ -5184,24 +5218,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
803
if (GET_MODE_CLASS (mode) == MODE_INT)
805
/* The multiply will be canonicalized as a shift, cost it as such. */
806
- if (CONST_INT_P (op1)
807
- && exact_log2 (INTVAL (op1)) > 0)
808
+ if (aarch64_shift_p (GET_CODE (x))
809
+ || (CONST_INT_P (op1)
810
+ && exact_log2 (INTVAL (op1)) > 0))
812
+ bool is_extend = GET_CODE (op0) == ZERO_EXTEND
813
+ || GET_CODE (op0) == SIGN_EXTEND;
817
- /* ADD (shifted register). */
818
- cost += extra_cost->alu.arith_shift;
822
+ /* ARITH + shift-by-register. */
823
+ cost += extra_cost->alu.arith_shift_reg;
824
+ else if (is_extend)
825
+ /* ARITH + extended register. We don't have a cost field
826
+ for ARITH+EXTEND+SHIFT, so use extend_arith here. */
827
+ cost += extra_cost->alu.extend_arith;
829
+ /* ARITH + shift-by-immediate. */
830
+ cost += extra_cost->alu.arith_shift;
833
/* LSL (immediate). */
834
- cost += extra_cost->alu.shift;
835
+ cost += extra_cost->alu.shift;
838
+ /* Strip extends as we will have costed them in the case above. */
840
+ op0 = aarch64_strip_extend (op0);
842
cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
847
+ /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
848
+ compound and let the below cases handle it. After all, MNEG is a
849
+ special-case alias of MSUB. */
850
+ if (GET_CODE (op0) == NEG)
852
+ op0 = XEXP (op0, 0);
856
/* Integer multiplies or FMAs have zero/sign extending variants. */
857
if ((GET_CODE (op0) == ZERO_EXTEND
858
&& GET_CODE (op1) == ZERO_EXTEND)
859
@@ -5213,8 +5273,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
864
- /* MADD/SMADDL/UMADDL. */
866
+ /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
867
cost += extra_cost->mult[0].extend_add;
869
/* MUL/SMULL/UMULL. */
870
@@ -5224,15 +5284,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
874
- /* This is either an integer multiply or an FMA. In both cases
875
+ /* This is either an integer multiply or a MADD. In both cases
876
we want to recurse and cost the operands. */
877
cost += rtx_cost (op0, MULT, 0, speed)
878
+ rtx_cost (op1, MULT, 1, speed);
886
cost += extra_cost->mult[mode == DImode].add;
889
@@ -5250,7 +5310,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
890
which case FNMUL is different than FMUL with operand negation. */
891
bool neg0 = GET_CODE (op0) == NEG;
892
bool neg1 = GET_CODE (op1) == NEG;
893
- if (maybe_fma || !flag_rounding_math || (neg0 && neg1))
894
+ if (compound_p || !flag_rounding_math || (neg0 && neg1))
898
@@ -5258,7 +5318,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
904
/* FMADD/FNMADD/FNMSUB/FMSUB. */
905
cost += extra_cost->fp[mode == DFmode].fma;
907
@@ -5367,6 +5427,23 @@ aarch64_address_cost (rtx x,
911
+/* Return the cost of a branch. If SPEED_P is true then the compiler is
912
+ optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
916
+aarch64_branch_cost (bool speed_p, bool predictable_p)
918
+ /* When optimizing for speed, use the cost of unpredictable branches. */
919
+ const struct cpu_branch_cost *branch_costs =
920
+ aarch64_tune_params->branch_costs;
922
+ if (!speed_p || predictable_p)
923
+ return branch_costs->predictable;
925
+ return branch_costs->unpredictable;
928
/* Return true if the RTX X in mode MODE is a zero or sign extract
929
usable in an ADD or SUB (extended register) instruction. */
931
@@ -5415,6 +5492,51 @@ aarch64_frint_unspec_p (unsigned int u)
935
+/* Return true iff X is an rtx that will match an extr instruction
936
+ i.e. as described in the *extr<mode>5_insn family of patterns.
937
+ OP0 and OP1 will be set to the operands of the shifts involved
938
+ on success and will be NULL_RTX otherwise. */
941
+aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
944
+ machine_mode mode = GET_MODE (x);
946
+ *res_op0 = NULL_RTX;
947
+ *res_op1 = NULL_RTX;
949
+ if (GET_CODE (x) != IOR)
955
+ if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
956
+ || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
958
+ /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
959
+ if (GET_CODE (op1) == ASHIFT)
960
+ std::swap (op0, op1);
962
+ if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
965
+ unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
966
+ unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
968
+ if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
969
+ && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
971
+ *res_op0 = XEXP (op0, 0);
972
+ *res_op1 = XEXP (op1, 0);
980
/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
981
storing it in *COST. Result is true if the total cost of the operation
982
has now been calculated. */
983
@@ -5505,16 +5627,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
984
above this default. */
985
*cost = COSTS_N_INSNS (1);
987
- /* TODO: The cost infrastructure currently does not handle
988
- vector operations. Assume that all vector operations
989
- are equally expensive. */
990
- if (VECTOR_MODE_P (mode))
993
- *cost += extra_cost->vect.alu;
1000
@@ -5529,7 +5641,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1003
rtx address = XEXP (op0, 0);
1004
- if (GET_MODE_CLASS (mode) == MODE_INT)
1005
+ if (VECTOR_MODE_P (mode))
1006
+ *cost += extra_cost->ldst.storev;
1007
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
1008
*cost += extra_cost->ldst.store;
1009
else if (mode == SFmode)
1010
*cost += extra_cost->ldst.storef;
1011
@@ -5550,15 +5664,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1015
+ /* The cost is one per vector-register copied. */
1016
+ if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
1018
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
1019
+ / GET_MODE_SIZE (V4SImode);
1020
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
1022
/* const0_rtx is in general free, but we will use an
1023
instruction to set a register to 0. */
1024
- if (REG_P (op1) || op1 == const0_rtx)
1026
- /* The cost is 1 per register copied. */
1027
- int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
1028
+ else if (REG_P (op1) || op1 == const0_rtx)
1030
+ /* The cost is 1 per register copied. */
1031
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
1033
- *cost = COSTS_N_INSNS (n_minus_1 + 1);
1035
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
1038
/* Cost is just the cost of the RHS of the set. */
1039
*cost += rtx_cost (op1, SET, 1, speed);
1040
@@ -5656,7 +5777,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1041
approximation for the additional cost of the addressing
1043
rtx address = XEXP (x, 0);
1044
- if (GET_MODE_CLASS (mode) == MODE_INT)
1045
+ if (VECTOR_MODE_P (mode))
1046
+ *cost += extra_cost->ldst.loadv;
1047
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
1048
*cost += extra_cost->ldst.load;
1049
else if (mode == SFmode)
1050
*cost += extra_cost->ldst.loadf;
1051
@@ -5673,6 +5796,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1055
+ if (VECTOR_MODE_P (mode))
1060
+ *cost += extra_cost->vect.alu;
1065
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
1067
if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
1068
@@ -5717,7 +5850,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1072
- *cost += extra_cost->alu.clz;
1074
+ if (VECTOR_MODE_P (mode))
1075
+ *cost += extra_cost->vect.alu;
1077
+ *cost += extra_cost->alu.clz;
1082
@@ -5796,12 +5934,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1084
if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
1086
+ *cost += rtx_cost (op0, COMPARE, 0, speed);
1087
/* FCMP supports constant 0.0 for no extra cost. */
1093
+ if (VECTOR_MODE_P (mode))
1095
+ /* Vector compare. */
1097
+ *cost += extra_cost->vect.alu;
1099
+ if (aarch64_float_const_zero_rtx_p (op1))
1101
+ /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
1110
@@ -5810,6 +5963,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
1114
+ *cost += rtx_cost (op0, MINUS, 0, speed);
1116
/* Detect valid immediates. */
1117
if ((GET_MODE_CLASS (mode) == MODE_INT
1118
|| (GET_MODE_CLASS (mode) == MODE_CC
1119
@@ -5817,20 +5972,17 @@ cost_minus:
1120
&& CONST_INT_P (op1)
1121
&& aarch64_uimm12_shift (INTVAL (op1)))
1123
- *cost += rtx_cost (op0, MINUS, 0, speed);
1126
/* SUB(S) (immediate). */
1127
*cost += extra_cost->alu.arith;
1132
/* Look for SUB (extended register). */
1133
if (aarch64_rtx_arith_op_extract_p (op1, mode))
1136
- *cost += extra_cost->alu.arith_shift;
1137
+ *cost += extra_cost->alu.extend_arith;
1139
*cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
1140
(enum rtx_code) GET_CODE (op1),
1141
@@ -5842,13 +5994,12 @@ cost_minus:
1143
/* Cost this as an FMA-alike operation. */
1144
if ((GET_CODE (new_op1) == MULT
1145
- || GET_CODE (new_op1) == ASHIFT)
1146
+ || aarch64_shift_p (GET_CODE (new_op1)))
1149
*cost += aarch64_rtx_mult_cost (new_op1, MULT,
1150
(enum rtx_code) code,
1152
- *cost += rtx_cost (op0, MINUS, 0, speed);
1156
@@ -5856,12 +6007,21 @@ cost_minus:
1160
- if (GET_MODE_CLASS (mode) == MODE_INT)
1162
- *cost += extra_cost->alu.arith;
1163
+ if (VECTOR_MODE_P (mode))
1166
+ *cost += extra_cost->vect.alu;
1168
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
1171
+ *cost += extra_cost->alu.arith;
1173
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1175
- *cost += extra_cost->fp[mode == DFmode].addsub;
1178
+ *cost += extra_cost->fp[mode == DFmode].addsub;
1183
@@ -5895,11 +6055,13 @@ cost_plus:
1187
+ *cost += rtx_cost (op1, PLUS, 1, speed);
1189
/* Look for ADD (extended register). */
1190
if (aarch64_rtx_arith_op_extract_p (op0, mode))
1193
- *cost += extra_cost->alu.arith_shift;
1194
+ *cost += extra_cost->alu.extend_arith;
1196
*cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
1197
(enum rtx_code) GET_CODE (op0),
1198
@@ -5912,25 +6074,32 @@ cost_plus:
1199
new_op0 = aarch64_strip_extend (op0);
1201
if (GET_CODE (new_op0) == MULT
1202
- || GET_CODE (new_op0) == ASHIFT)
1203
+ || aarch64_shift_p (GET_CODE (new_op0)))
1205
*cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
1207
- *cost += rtx_cost (op1, PLUS, 1, speed);
1211
- *cost += (rtx_cost (new_op0, PLUS, 0, speed)
1212
- + rtx_cost (op1, PLUS, 1, speed));
1213
+ *cost += rtx_cost (new_op0, PLUS, 0, speed);
1217
- if (GET_MODE_CLASS (mode) == MODE_INT)
1219
- *cost += extra_cost->alu.arith;
1220
+ if (VECTOR_MODE_P (mode))
1223
+ *cost += extra_cost->vect.alu;
1225
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
1228
+ *cost += extra_cost->alu.arith;
1230
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1232
- *cost += extra_cost->fp[mode == DFmode].addsub;
1235
+ *cost += extra_cost->fp[mode == DFmode].addsub;
1240
@@ -5939,8 +6108,12 @@ cost_plus:
1241
*cost = COSTS_N_INSNS (1);
1244
- *cost += extra_cost->alu.rev;
1247
+ if (VECTOR_MODE_P (mode))
1248
+ *cost += extra_cost->vect.alu;
1250
+ *cost += extra_cost->alu.rev;
1255
@@ -5948,8 +6121,22 @@ cost_plus:
1257
*cost = COSTS_N_INSNS (1);
1261
+ if (VECTOR_MODE_P (mode))
1262
+ *cost += extra_cost->vect.alu;
1264
+ *cost += extra_cost->alu.rev;
1269
+ if (aarch64_extr_rtx_p (x, &op0, &op1))
1271
+ *cost += rtx_cost (op0, IOR, 0, speed)
1272
+ + rtx_cost (op1, IOR, 1, speed);
1274
- *cost += extra_cost->alu.rev;
1275
+ *cost += extra_cost->alu.shift;
1279
@@ -5960,6 +6147,13 @@ cost_plus:
1283
+ if (VECTOR_MODE_P (mode))
1286
+ *cost += extra_cost->vect.alu;
1291
&& GET_CODE (op0) == MULT
1292
&& CONST_INT_P (XEXP (op0, 1))
1293
@@ -6025,13 +6219,52 @@ cost_plus:
1298
+ op0 = aarch64_strip_shift (x);
1300
+ if (VECTOR_MODE_P (mode))
1303
+ *cost += extra_cost->vect.alu;
1307
+ /* MVN-shifted-reg. */
1310
+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
1313
+ *cost += extra_cost->alu.log_shift;
1317
+ /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
1318
+ Handle the second form here taking care that 'a' in the above can
1320
+ else if (GET_CODE (op0) == XOR)
1322
+ rtx newop0 = XEXP (op0, 0);
1323
+ rtx newop1 = XEXP (op0, 1);
1324
+ rtx op0_stripped = aarch64_strip_shift (newop0);
1326
+ *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)
1327
+ + rtx_cost (op0_stripped, XOR, 0, speed);
1331
+ if (op0_stripped != newop0)
1332
+ *cost += extra_cost->alu.log_shift;
1334
+ *cost += extra_cost->alu.logical;
1341
*cost += extra_cost->alu.logical;
1343
- /* The logical instruction could have the shifted register form,
1344
- but the cost is the same if the shift is processed as a separate
1345
- instruction, so we don't bother with it here. */
1349
@@ -6067,10 +6300,19 @@ cost_plus:
1355
- *cost += extra_cost->alu.extend;
1358
+ if (VECTOR_MODE_P (mode))
1361
+ *cost += extra_cost->vect.alu;
1366
+ *cost += extra_cost->alu.extend;
1372
@@ -6090,7 +6332,12 @@ cost_plus:
1376
- *cost += extra_cost->alu.extend;
1378
+ if (VECTOR_MODE_P (mode))
1379
+ *cost += extra_cost->vect.alu;
1381
+ *cost += extra_cost->alu.extend;
1386
@@ -6099,10 +6346,20 @@ cost_plus:
1388
if (CONST_INT_P (op1))
1390
- /* LSL (immediate), UBMF, UBFIZ and friends. These are all
1393
- *cost += extra_cost->alu.shift;
1395
+ if (VECTOR_MODE_P (mode))
1397
+ /* Vector shift (immediate). */
1398
+ *cost += extra_cost->vect.alu;
1402
+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
1404
+ *cost += extra_cost->alu.shift;
1408
/* We can incorporate zero/sign extend for free. */
1409
if (GET_CODE (op0) == ZERO_EXTEND
1410
@@ -6114,10 +6371,19 @@ cost_plus:
1416
- *cost += extra_cost->alu.shift_reg;
1419
+ if (VECTOR_MODE_P (mode))
1421
+ /* Vector shift (register). */
1422
+ *cost += extra_cost->vect.alu;
1427
+ *cost += extra_cost->alu.shift_reg;
1430
return false; /* All arguments need to be in registers. */
1433
@@ -6132,7 +6398,12 @@ cost_plus:
1435
/* ASR (immediate) and friends. */
1437
- *cost += extra_cost->alu.shift;
1439
+ if (VECTOR_MODE_P (mode))
1440
+ *cost += extra_cost->vect.alu;
1442
+ *cost += extra_cost->alu.shift;
1445
*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
1447
@@ -6142,8 +6413,12 @@ cost_plus:
1449
/* ASR (register) and friends. */
1451
- *cost += extra_cost->alu.shift_reg;
1454
+ if (VECTOR_MODE_P (mode))
1455
+ *cost += extra_cost->vect.alu;
1457
+ *cost += extra_cost->alu.shift_reg;
1459
return false; /* All arguments need to be in registers. */
1462
@@ -6191,7 +6466,12 @@ cost_plus:
1466
- *cost += extra_cost->alu.bfx;
1468
+ if (VECTOR_MODE_P (mode))
1469
+ *cost += extra_cost->vect.alu;
1471
+ *cost += extra_cost->alu.bfx;
1474
/* We can trust that the immediates used will be correct (there
1475
are no by-register forms), so we need only cost op0. */
1476
@@ -6208,7 +6488,9 @@ cost_plus:
1480
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
1481
+ if (VECTOR_MODE_P (mode))
1482
+ *cost += extra_cost->vect.alu;
1483
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
1484
*cost += (extra_cost->mult[GET_MODE (x) == DImode].add
1485
+ extra_cost->mult[GET_MODE (x) == DImode].idiv);
1486
else if (GET_MODE (x) == DFmode)
1487
@@ -6225,7 +6507,9 @@ cost_plus:
1491
- if (GET_MODE_CLASS (mode) == MODE_INT)
1492
+ if (VECTOR_MODE_P (mode))
1493
+ *cost += extra_cost->vect.alu;
1494
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
1495
/* There is no integer SQRT, so only DIV and UDIV can get
1497
*cost += extra_cost->mult[mode == DImode].idiv;
1498
@@ -6257,7 +6541,12 @@ cost_plus:
1502
- *cost += extra_cost->fp[mode == DFmode].fma;
1504
+ if (VECTOR_MODE_P (mode))
1505
+ *cost += extra_cost->vect.alu;
1507
+ *cost += extra_cost->fp[mode == DFmode].fma;
1510
/* FMSUB, FNMADD, and FNMSUB are free. */
1511
if (GET_CODE (op0) == NEG)
1512
@@ -6295,14 +6584,36 @@ cost_plus:
1513
*cost += rtx_cost (op2, FMA, 2, speed);
1517
+ case UNSIGNED_FLOAT:
1519
+ *cost += extra_cost->fp[mode == DFmode].fromint;
1524
- *cost += extra_cost->fp[mode == DFmode].widen;
1526
+ if (VECTOR_MODE_P (mode))
1528
+ /*Vector truncate. */
1529
+ *cost += extra_cost->vect.alu;
1532
+ *cost += extra_cost->fp[mode == DFmode].widen;
1536
case FLOAT_TRUNCATE:
1538
- *cost += extra_cost->fp[mode == DFmode].narrow;
1540
+ if (VECTOR_MODE_P (mode))
1542
+ /*Vector conversion. */
1543
+ *cost += extra_cost->vect.alu;
1546
+ *cost += extra_cost->fp[mode == DFmode].narrow;
1551
@@ -6323,15 +6634,37 @@ cost_plus:
1555
- *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
1558
+ if (VECTOR_MODE_P (mode))
1559
+ *cost += extra_cost->vect.alu;
1561
+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
1563
*cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
1567
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1568
+ if (VECTOR_MODE_P (mode))
1570
- /* FABS and FNEG are analogous. */
1571
+ /* ABS (vector). */
1573
+ *cost += extra_cost->vect.alu;
1575
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1577
+ op0 = XEXP (x, 0);
1579
+ /* FABD, which is analogous to FADD. */
1580
+ if (GET_CODE (op0) == MINUS)
1582
+ *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);
1583
+ + rtx_cost (XEXP (op0, 1), MINUS, 1, speed);
1585
+ *cost += extra_cost->fp[mode == DFmode].addsub;
1589
+ /* Simple FABS is analogous to FNEG. */
1591
*cost += extra_cost->fp[mode == DFmode].neg;
1593
@@ -6350,10 +6683,15 @@ cost_plus:
1597
- /* FMAXNM/FMINNM/FMAX/FMIN.
1598
- TODO: This may not be accurate for all implementations, but
1599
- we do not model this in the cost tables. */
1600
- *cost += extra_cost->fp[mode == DFmode].addsub;
1601
+ if (VECTOR_MODE_P (mode))
1602
+ *cost += extra_cost->vect.alu;
1605
+ /* FMAXNM/FMINNM/FMAX/FMIN.
1606
+ TODO: This may not be accurate for all implementations, but
1607
+ we do not model this in the cost tables. */
1608
+ *cost += extra_cost->fp[mode == DFmode].addsub;
1613
@@ -7830,6 +8168,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
1617
+/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
1618
+ type as described in AAPCS64 \S 4.1.2.
1620
+ See the comment above aarch64_composite_type_p for the notes on MODE. */
1623
+aarch64_short_vector_p (const_tree type,
1624
+ machine_mode mode)
1626
+ HOST_WIDE_INT size = -1;
1628
+ if (type && TREE_CODE (type) == VECTOR_TYPE)
1629
+ size = int_size_in_bytes (type);
1630
+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1631
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1632
+ size = GET_MODE_SIZE (mode);
1634
+ return (size == 8 || size == 16);
1637
/* Return TRUE if the type, as described by TYPE and MODE, is a composite
1638
type as described in AAPCS64 \S 4.3. This includes aggregate, union and
1639
array types. The C99 floating-point complex types are also considered
1640
@@ -7851,6 +8209,9 @@ static bool
1641
aarch64_composite_type_p (const_tree type,
1644
+ if (aarch64_short_vector_p (type, mode))
1647
if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
1650
@@ -7862,27 +8223,6 @@ aarch64_composite_type_p (const_tree type,
1654
-/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
1655
- type as described in AAPCS64 \S 4.1.2.
1657
- See the comment above aarch64_composite_type_p for the notes on MODE. */
1660
-aarch64_short_vector_p (const_tree type,
1661
- machine_mode mode)
1663
- HOST_WIDE_INT size = -1;
1665
- if (type && TREE_CODE (type) == VECTOR_TYPE)
1666
- size = int_size_in_bytes (type);
1667
- else if (!aarch64_composite_type_p (type, mode)
1668
- && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1669
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
1670
- size = GET_MODE_SIZE (mode);
1672
- return (size == 8 || size == 16) ? true : false;
1675
/* Return TRUE if an argument, whose type is described by TYPE and MODE,
1676
shall be passed or returned in simd/fp register(s) (providing these
1677
parameter passing registers are available).
1678
@@ -8581,24 +8921,6 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
1682
-/* Emit code to place a AdvSIMD pair result in memory locations (with equal
1685
-aarch64_simd_emit_pair_result_insn (machine_mode mode,
1686
- rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
1689
- rtx mem = gen_rtx_MEM (mode, destaddr);
1690
- rtx tmp1 = gen_reg_rtx (mode);
1691
- rtx tmp2 = gen_reg_rtx (mode);
1693
- emit_insn (intfn (tmp1, op1, tmp2));
1695
- emit_move_insn (mem, tmp1);
1696
- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
1697
- emit_move_insn (mem, tmp2);
1700
/* Return TRUE if OP is a valid vector addressing mode. */
1702
aarch64_simd_mem_operand_p (rtx op)
1703
@@ -8781,22 +9103,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)
1704
machine_mode mode = GET_MODE (target);
1705
machine_mode inner_mode = GET_MODE_INNER (mode);
1706
int n_elts = GET_MODE_NUNITS (mode);
1707
- int n_var = 0, one_var = -1;
1709
+ rtx any_const = NULL_RTX;
1710
bool all_same = true;
1714
- x = XVECEXP (vals, 0, 0);
1715
- if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
1716
- n_var = 1, one_var = 0;
1718
- for (i = 1; i < n_elts; ++i)
1719
+ for (int i = 0; i < n_elts; ++i)
1721
- x = XVECEXP (vals, 0, i);
1722
+ rtx x = XVECEXP (vals, 0, i);
1723
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
1724
- ++n_var, one_var = i;
1729
- if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
1730
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
1734
@@ -8813,36 +9132,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)
1735
/* Splat a single non-constant element if we can. */
1738
- x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
1739
+ rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
1740
aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
1744
- /* One field is non-constant. Load constant then overwrite varying
1745
- field. This is more efficient than using the stack. */
1747
+ /* Half the fields (or less) are non-constant. Load constant then overwrite
1748
+ varying fields. Hope that this is more efficient than using the stack. */
1749
+ if (n_var <= n_elts/2)
1751
rtx copy = copy_rtx (vals);
1752
- rtx index = GEN_INT (one_var);
1753
- enum insn_code icode;
1755
- /* Load constant part of vector, substitute neighboring value for
1756
- varying element. */
1757
- XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
1758
+ /* Load constant part of vector. We really don't care what goes into the
1759
+ parts we will overwrite, but we're more likely to be able to load the
1760
+ constant efficiently if it has fewer, larger, repeating parts
1761
+ (see aarch64_simd_valid_immediate). */
1762
+ for (int i = 0; i < n_elts; i++)
1764
+ rtx x = XVECEXP (vals, 0, i);
1765
+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
1767
+ rtx subst = any_const;
1768
+ for (int bit = n_elts / 2; bit > 0; bit /= 2)
1770
+ /* Look in the copied vector, as more elements are const. */
1771
+ rtx test = XVECEXP (copy, 0, i ^ bit);
1772
+ if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
1778
+ XVECEXP (copy, 0, i) = subst;
1780
aarch64_expand_vector_init (target, copy);
1782
- /* Insert variable. */
1783
- x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
1784
- icode = optab_handler (vec_set_optab, mode);
1785
+ /* Insert variables. */
1786
+ enum insn_code icode = optab_handler (vec_set_optab, mode);
1787
gcc_assert (icode != CODE_FOR_nothing);
1788
- emit_insn (GEN_FCN (icode) (target, x, index));
1790
+ for (int i = 0; i < n_elts; i++)
1792
+ rtx x = XVECEXP (vals, 0, i);
1793
+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
1795
+ x = copy_to_mode_reg (inner_mode, x);
1796
+ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
1801
/* Construct the vector in memory one field at a time
1802
and load the whole vector. */
1803
- mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
1804
- for (i = 0; i < n_elts; i++)
1805
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
1806
+ for (int i = 0; i < n_elts; i++)
1807
emit_move_insn (adjust_address_nv (mem, inner_mode,
1808
i * GET_MODE_SIZE (inner_mode)),
1809
XVECEXP (vals, 0, i));
1810
--- a/src/gcc/config/aarch64/aarch64.h
1811
+++ b/src/gcc/config/aarch64/aarch64.h
1812
@@ -506,7 +506,7 @@ enum reg_class
1816
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
1817
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
1818
TARGET_CPU_##INTERNAL_IDENT,
1819
#include "aarch64-cores.def"
1821
@@ -823,7 +823,8 @@ do { \
1822
#define TRAMPOLINE_SECTION text_section
1824
/* To start with. */
1825
-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
1826
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
1827
+ (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
1830
/* Assembly output. */
1831
@@ -929,11 +930,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
1832
#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
1833
{ "rewrite_mcpu", aarch64_rewrite_mcpu },
1835
+#if defined(__aarch64__)
1836
+extern const char *host_detect_local_cpu (int argc, const char **argv);
1837
+# define EXTRA_SPEC_FUNCTIONS \
1838
+ { "local_cpu_detect", host_detect_local_cpu }, \
1839
+ BIG_LITTLE_CPU_SPEC_FUNCTIONS
1841
+# define MCPU_MTUNE_NATIVE_SPECS \
1842
+ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \
1843
+ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \
1844
+ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
1846
+# define MCPU_MTUNE_NATIVE_SPECS ""
1847
+# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
1850
#define ASM_CPU_SPEC \
1853
-#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
1855
#define EXTRA_SPECS \
1856
{ "asm_cpu_spec", ASM_CPU_SPEC }
1858
--- a/src/gcc/config/aarch64/aarch64.md
1859
+++ b/src/gcc/config/aarch64/aarch64.md
1860
@@ -1414,18 +1414,28 @@
1862
if (! aarch64_plus_operand (operands[2], VOIDmode))
1864
- rtx subtarget = ((optimize && can_create_pseudo_p ())
1865
- ? gen_reg_rtx (<MODE>mode) : operands[0]);
1866
HOST_WIDE_INT imm = INTVAL (operands[2]);
1869
- imm = -(-imm & ~0xfff);
1870
+ if (aarch64_move_imm (imm, <MODE>mode) && can_create_pseudo_p ())
1872
+ rtx tmp = gen_reg_rtx (<MODE>mode);
1873
+ emit_move_insn (tmp, operands[2]);
1874
+ operands[2] = tmp;
1879
- emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
1880
- operands[1] = subtarget;
1881
- operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
1883
+ rtx subtarget = ((optimize && can_create_pseudo_p ())
1884
+ ? gen_reg_rtx (<MODE>mode) : operands[0]);
1887
+ imm = -(-imm & ~0xfff);
1891
+ emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
1892
+ operands[1] = subtarget;
1893
+ operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
1898
@@ -1529,6 +1539,38 @@
1899
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
1902
+(define_insn "*adds_shift_imm_<mode>"
1903
+ [(set (reg:CC_NZ CC_REGNUM)
1905
+ (plus:GPI (ASHIFT:GPI
1906
+ (match_operand:GPI 1 "register_operand" "r")
1907
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
1908
+ (match_operand:GPI 3 "register_operand" "r"))
1910
+ (set (match_operand:GPI 0 "register_operand" "=r")
1911
+ (plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))
1914
+ "adds\\t%<w>0, %<w>3, %<w>1, <shift> %2"
1915
+ [(set_attr "type" "alus_shift_imm")]
1918
+(define_insn "*subs_shift_imm_<mode>"
1919
+ [(set (reg:CC_NZ CC_REGNUM)
1921
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
1923
+ (match_operand:GPI 2 "register_operand" "r")
1924
+ (match_operand:QI 3 "aarch64_shift_imm_<mode>" "n")))
1926
+ (set (match_operand:GPI 0 "register_operand" "=r")
1927
+ (minus:GPI (match_dup 1)
1928
+ (ASHIFT:GPI (match_dup 2) (match_dup 3))))]
1930
+ "subs\\t%<w>0, %<w>1, %<w>2, <shift> %3"
1931
+ [(set_attr "type" "alus_shift_imm")]
1934
(define_insn "*adds_mul_imm_<mode>"
1935
[(set (reg:CC_NZ CC_REGNUM)
1937
@@ -1589,6 +1631,42 @@
1938
[(set_attr "type" "alus_ext")]
1941
+(define_insn "*adds_<optab><ALLX:mode>_shift_<GPI:mode>"
1942
+ [(set (reg:CC_NZ CC_REGNUM)
1944
+ (plus:GPI (ashift:GPI
1946
+ (match_operand:ALLX 1 "register_operand" "r"))
1947
+ (match_operand 2 "aarch64_imm3" "Ui3"))
1948
+ (match_operand:GPI 3 "register_operand" "r"))
1950
+ (set (match_operand:GPI 0 "register_operand" "=rk")
1951
+ (plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1))
1955
+ "adds\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
1956
+ [(set_attr "type" "alus_ext")]
1959
+(define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>"
1960
+ [(set (reg:CC_NZ CC_REGNUM)
1962
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
1965
+ (match_operand:ALLX 2 "register_operand" "r"))
1966
+ (match_operand 3 "aarch64_imm3" "Ui3")))
1968
+ (set (match_operand:GPI 0 "register_operand" "=rk")
1969
+ (minus:GPI (match_dup 1)
1970
+ (ashift:GPI (ANY_EXTEND:GPI (match_dup 2))
1973
+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
1974
+ [(set_attr "type" "alus_ext")]
1977
(define_insn "*adds_<optab><mode>_multp2"
1978
[(set (reg:CC_NZ CC_REGNUM)
1980
@@ -1884,6 +1962,38 @@
1981
[(set_attr "type" "adc_reg")]
1984
+(define_insn "*add_uxt<mode>_shift2"
1985
+ [(set (match_operand:GPI 0 "register_operand" "=rk")
1986
+ (plus:GPI (and:GPI
1987
+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
1988
+ (match_operand 2 "aarch64_imm3" "Ui3"))
1989
+ (match_operand 3 "const_int_operand" "n"))
1990
+ (match_operand:GPI 4 "register_operand" "r")))]
1991
+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
1993
+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]),
1994
+ INTVAL (operands[3])));
1995
+ return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
1996
+ [(set_attr "type" "alu_ext")]
1999
+;; zero_extend version of above
2000
+(define_insn "*add_uxtsi_shift2_uxtw"
2001
+ [(set (match_operand:DI 0 "register_operand" "=rk")
2004
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
2005
+ (match_operand 2 "aarch64_imm3" "Ui3"))
2006
+ (match_operand 3 "const_int_operand" "n"))
2007
+ (match_operand:SI 4 "register_operand" "r"))))]
2008
+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"
2010
+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
2011
+ INTVAL (operands[3])));
2012
+ return \"add\t%w0, %w4, %w1, uxt%e3 %2\";"
2013
+ [(set_attr "type" "alu_ext")]
2016
(define_insn "*add_uxt<mode>_multp2"
2017
[(set (match_operand:GPI 0 "register_operand" "=rk")
2019
@@ -2140,6 +2250,38 @@
2020
[(set_attr "type" "adc_reg")]
2023
+(define_insn "*sub_uxt<mode>_shift2"
2024
+ [(set (match_operand:GPI 0 "register_operand" "=rk")
2025
+ (minus:GPI (match_operand:GPI 4 "register_operand" "rk")
2027
+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
2028
+ (match_operand 2 "aarch64_imm3" "Ui3"))
2029
+ (match_operand 3 "const_int_operand" "n"))))]
2030
+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
2032
+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
2033
+ INTVAL (operands[3])));
2034
+ return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
2035
+ [(set_attr "type" "alu_ext")]
2038
+;; zero_extend version of above
2039
+(define_insn "*sub_uxtsi_shift2_uxtw"
2040
+ [(set (match_operand:DI 0 "register_operand" "=rk")
2042
+ (minus:SI (match_operand:SI 4 "register_operand" "rk")
2044
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
2045
+ (match_operand 2 "aarch64_imm3" "Ui3"))
2046
+ (match_operand 3 "const_int_operand" "n")))))]
2047
+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"
2049
+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
2050
+ INTVAL (operands[3])));
2051
+ return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";"
2052
+ [(set_attr "type" "alu_ext")]
2055
(define_insn "*sub_uxt<mode>_multp2"
2056
[(set (match_operand:GPI 0 "register_operand" "=rk")
2057
(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
2058
@@ -3058,6 +3200,26 @@
2059
(set_attr "simd" "*,yes")]
2062
+(define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze"
2063
+ [(set (match_operand:DI 0 "register_operand" "=r")
2065
+ (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
2066
+ (match_operand:SI 2 "register_operand" "r"))))]
2068
+ "<NLOGICAL:nlogical>\\t%w0, %w2, %w1"
2069
+ [(set_attr "type" "logic_reg")]
2072
+(define_insn "*xor_one_cmplsidi3_ze"
2073
+ [(set (match_operand:DI 0 "register_operand" "=r")
2075
+ (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r")
2076
+ (match_operand:SI 2 "register_operand" "r")))))]
2078
+ "eon\\t%w0, %w1, %w2"
2079
+ [(set_attr "type" "logic_reg")]
2082
;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)).
2083
;; eon does not operate on SIMD registers so the vector variant must be split.
2084
(define_insn_and_split "*xor_one_cmpl<mode>3"
2085
@@ -3131,6 +3293,32 @@
2086
[(set_attr "type" "logics_shift_imm")]
2089
+(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"
2090
+ [(set (match_operand:GPI 0 "register_operand" "=r")
2093
+ (match_operand:GPI 1 "register_operand" "r")
2094
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
2095
+ (match_operand:GPI 3 "register_operand" "r"))))]
2097
+ "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
2098
+ [(set_attr "type" "logic_shift_imm")]
2101
+;; Zero-extend version of the above.
2102
+(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"
2103
+ [(set (match_operand:DI 0 "register_operand" "=r")
2107
+ (match_operand:SI 1 "register_operand" "r")
2108
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
2109
+ (match_operand:SI 3 "register_operand" "r")))))]
2111
+ "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"
2112
+ [(set_attr "type" "logic_shift_imm")]
2115
(define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
2116
[(set (reg:CC_NZ CC_REGNUM)
2118
@@ -3551,6 +3739,21 @@
2119
[(set_attr "type" "shift_imm")]
2122
+;; There are no canonicalisation rules for ashift and lshiftrt inside an ior
2123
+;; so we have to match both orderings.
2124
+(define_insn "*extr<mode>5_insn_alt"
2125
+ [(set (match_operand:GPI 0 "register_operand" "=r")
2126
+ (ior:GPI (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
2127
+ (match_operand 4 "const_int_operand" "n"))
2128
+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
2129
+ (match_operand 3 "const_int_operand" "n"))))]
2130
+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode)
2131
+ && (UINTVAL (operands[3]) + UINTVAL (operands[4])
2132
+ == GET_MODE_BITSIZE (<MODE>mode))"
2133
+ "extr\\t%<w>0, %<w>1, %<w>2, %4"
2134
+ [(set_attr "type" "shift_imm")]
2137
;; zero_extend version of the above
2138
(define_insn "*extrsi5_insn_uxtw"
2139
[(set (match_operand:DI 0 "register_operand" "=r")
2140
@@ -3565,6 +3768,19 @@
2141
[(set_attr "type" "shift_imm")]
2144
+(define_insn "*extrsi5_insn_uxtw_alt"
2145
+ [(set (match_operand:DI 0 "register_operand" "=r")
2147
+ (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
2148
+ (match_operand 4 "const_int_operand" "n"))
2149
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
2150
+ (match_operand 3 "const_int_operand" "n")))))]
2151
+ "UINTVAL (operands[3]) < 32 &&
2152
+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
2153
+ "extr\\t%w0, %w1, %w2, %4"
2154
+ [(set_attr "type" "shift_imm")]
2157
(define_insn "*ror<mode>3_insn"
2158
[(set (match_operand:GPI 0 "register_operand" "=r")
2159
(rotate:GPI (match_operand:GPI 1 "register_operand" "r")
2160
--- a/src/gcc/config/aarch64/arm_neon.h
2161
+++ b/src/gcc/config/aarch64/arm_neon.h
2162
@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a)
2164
/* vcvt_high_f32_f16 not supported */
2166
-static float32x2_t vdup_n_f32 (float32_t);
2168
#define vcvt_n_f32_s32(a, b) \
2171
@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b)
2175
-#define vst1_lane_f32(a, b, c) \
2178
- float32x2_t b_ = (b); \
2179
- float32_t * a_ = (a); \
2180
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2182
- : "r"(a_), "w"(b_), "i"(c) \
2186
-#define vst1_lane_f64(a, b, c) \
2189
- float64x1_t b_ = (b); \
2190
- float64_t * a_ = (a); \
2191
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2193
- : "r"(a_), "w"(b_), "i"(c) \
2197
-#define vst1_lane_p8(a, b, c) \
2200
- poly8x8_t b_ = (b); \
2201
- poly8_t * a_ = (a); \
2202
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2204
- : "r"(a_), "w"(b_), "i"(c) \
2208
-#define vst1_lane_p16(a, b, c) \
2211
- poly16x4_t b_ = (b); \
2212
- poly16_t * a_ = (a); \
2213
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2215
- : "r"(a_), "w"(b_), "i"(c) \
2219
-#define vst1_lane_s8(a, b, c) \
2222
- int8x8_t b_ = (b); \
2223
- int8_t * a_ = (a); \
2224
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2226
- : "r"(a_), "w"(b_), "i"(c) \
2230
-#define vst1_lane_s16(a, b, c) \
2233
- int16x4_t b_ = (b); \
2234
- int16_t * a_ = (a); \
2235
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2237
- : "r"(a_), "w"(b_), "i"(c) \
2241
-#define vst1_lane_s32(a, b, c) \
2244
- int32x2_t b_ = (b); \
2245
- int32_t * a_ = (a); \
2246
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2248
- : "r"(a_), "w"(b_), "i"(c) \
2252
-#define vst1_lane_s64(a, b, c) \
2255
- int64x1_t b_ = (b); \
2256
- int64_t * a_ = (a); \
2257
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2259
- : "r"(a_), "w"(b_), "i"(c) \
2263
-#define vst1_lane_u8(a, b, c) \
2266
- uint8x8_t b_ = (b); \
2267
- uint8_t * a_ = (a); \
2268
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2270
- : "r"(a_), "w"(b_), "i"(c) \
2274
-#define vst1_lane_u16(a, b, c) \
2277
- uint16x4_t b_ = (b); \
2278
- uint16_t * a_ = (a); \
2279
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2281
- : "r"(a_), "w"(b_), "i"(c) \
2285
-#define vst1_lane_u32(a, b, c) \
2288
- uint32x2_t b_ = (b); \
2289
- uint32_t * a_ = (a); \
2290
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2292
- : "r"(a_), "w"(b_), "i"(c) \
2296
-#define vst1_lane_u64(a, b, c) \
2299
- uint64x1_t b_ = (b); \
2300
- uint64_t * a_ = (a); \
2301
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2303
- : "r"(a_), "w"(b_), "i"(c) \
2308
-#define vst1q_lane_f32(a, b, c) \
2311
- float32x4_t b_ = (b); \
2312
- float32_t * a_ = (a); \
2313
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2315
- : "r"(a_), "w"(b_), "i"(c) \
2319
-#define vst1q_lane_f64(a, b, c) \
2322
- float64x2_t b_ = (b); \
2323
- float64_t * a_ = (a); \
2324
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2326
- : "r"(a_), "w"(b_), "i"(c) \
2330
-#define vst1q_lane_p8(a, b, c) \
2333
- poly8x16_t b_ = (b); \
2334
- poly8_t * a_ = (a); \
2335
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2337
- : "r"(a_), "w"(b_), "i"(c) \
2341
-#define vst1q_lane_p16(a, b, c) \
2344
- poly16x8_t b_ = (b); \
2345
- poly16_t * a_ = (a); \
2346
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2348
- : "r"(a_), "w"(b_), "i"(c) \
2352
-#define vst1q_lane_s8(a, b, c) \
2355
- int8x16_t b_ = (b); \
2356
- int8_t * a_ = (a); \
2357
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2359
- : "r"(a_), "w"(b_), "i"(c) \
2363
-#define vst1q_lane_s16(a, b, c) \
2366
- int16x8_t b_ = (b); \
2367
- int16_t * a_ = (a); \
2368
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2370
- : "r"(a_), "w"(b_), "i"(c) \
2374
-#define vst1q_lane_s32(a, b, c) \
2377
- int32x4_t b_ = (b); \
2378
- int32_t * a_ = (a); \
2379
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2381
- : "r"(a_), "w"(b_), "i"(c) \
2385
-#define vst1q_lane_s64(a, b, c) \
2388
- int64x2_t b_ = (b); \
2389
- int64_t * a_ = (a); \
2390
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2392
- : "r"(a_), "w"(b_), "i"(c) \
2396
-#define vst1q_lane_u8(a, b, c) \
2399
- uint8x16_t b_ = (b); \
2400
- uint8_t * a_ = (a); \
2401
- __asm__ ("st1 {%1.b}[%2],[%0]" \
2403
- : "r"(a_), "w"(b_), "i"(c) \
2407
-#define vst1q_lane_u16(a, b, c) \
2410
- uint16x8_t b_ = (b); \
2411
- uint16_t * a_ = (a); \
2412
- __asm__ ("st1 {%1.h}[%2],[%0]" \
2414
- : "r"(a_), "w"(b_), "i"(c) \
2418
-#define vst1q_lane_u32(a, b, c) \
2421
- uint32x4_t b_ = (b); \
2422
- uint32_t * a_ = (a); \
2423
- __asm__ ("st1 {%1.s}[%2],[%0]" \
2425
- : "r"(a_), "w"(b_), "i"(c) \
2429
-#define vst1q_lane_u64(a, b, c) \
2432
- uint64x2_t b_ = (b); \
2433
- uint64_t * a_ = (a); \
2434
- __asm__ ("st1 {%1.d}[%2],[%0]" \
2436
- : "r"(a_), "w"(b_), "i"(c) \
2441
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2442
vtst_p8 (poly8x8_t a, poly8x8_t b)
2444
@@ -11668,25 +11400,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
2448
-static __inline uint8x16_t
2449
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2450
vaeseq_u8 (uint8x16_t data, uint8x16_t key)
2452
return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
2455
-static __inline uint8x16_t
2456
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2457
vaesdq_u8 (uint8x16_t data, uint8x16_t key)
2459
return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
2462
-static __inline uint8x16_t
2463
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2464
vaesmcq_u8 (uint8x16_t data)
2466
return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
2469
-static __inline uint8x16_t
2470
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2471
vaesimcq_u8 (uint8x16_t data)
2473
return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
2474
@@ -11887,7 +11619,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b)
2475
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2476
vceq_s64 (int64x1_t __a, int64x1_t __b)
2478
- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
2479
+ return (uint64x1_t) (__a == __b);
2482
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2483
@@ -11911,7 +11643,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b)
2484
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2485
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
2487
- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
2488
+ return (__a == __b);
2491
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2492
@@ -12047,7 +11779,7 @@ vceqz_s32 (int32x2_t __a)
2493
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2494
vceqz_s64 (int64x1_t __a)
2496
- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
2497
+ return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
2500
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2501
@@ -12071,7 +11803,7 @@ vceqz_u32 (uint32x2_t __a)
2502
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2503
vceqz_u64 (uint64x1_t __a)
2505
- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
2506
+ return (__a == __AARCH64_UINT64_C (0));
2509
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2510
@@ -12201,7 +11933,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b)
2511
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2512
vcge_s64 (int64x1_t __a, int64x1_t __b)
2514
- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
2515
+ return (uint64x1_t) (__a >= __b);
2518
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2519
@@ -12225,7 +11957,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b)
2520
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2521
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
2523
- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
2524
+ return (__a >= __b);
2527
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2528
@@ -12349,7 +12081,7 @@ vcgez_s32 (int32x2_t __a)
2529
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2530
vcgez_s64 (int64x1_t __a)
2532
- return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
2533
+ return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
2536
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2537
@@ -12443,7 +12175,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b)
2538
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2539
vcgt_s64 (int64x1_t __a, int64x1_t __b)
2541
- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
2542
+ return (uint64x1_t) (__a > __b);
2545
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2546
@@ -12467,7 +12199,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
2547
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2548
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
2550
- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
2551
+ return (__a > __b);
2554
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2555
@@ -12591,7 +12323,7 @@ vcgtz_s32 (int32x2_t __a)
2556
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2557
vcgtz_s64 (int64x1_t __a)
2559
- return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
2560
+ return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
2563
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2564
@@ -12685,7 +12417,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b)
2565
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2566
vcle_s64 (int64x1_t __a, int64x1_t __b)
2568
- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
2569
+ return (uint64x1_t) (__a <= __b);
2572
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2573
@@ -12709,7 +12441,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b)
2574
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2575
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
2577
- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
2578
+ return (__a <= __b);
2581
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2582
@@ -12833,7 +12565,7 @@ vclez_s32 (int32x2_t __a)
2583
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2584
vclez_s64 (int64x1_t __a)
2586
- return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
2587
+ return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
2590
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2591
@@ -12927,7 +12659,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b)
2592
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2593
vclt_s64 (int64x1_t __a, int64x1_t __b)
2595
- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
2596
+ return (uint64x1_t) (__a < __b);
2599
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2600
@@ -12951,7 +12683,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b)
2601
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2602
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
2604
- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
2605
+ return (__a < __b);
2608
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2609
@@ -13075,7 +12807,7 @@ vcltz_s32 (int32x2_t __a)
2610
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2611
vcltz_s64 (int64x1_t __a)
2613
- return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
2614
+ return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
2617
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2618
@@ -21321,72 +21053,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
2622
-static __inline uint32x4_t
2623
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2624
vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
2626
return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
2628
-static __inline uint32x4_t
2630
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2631
vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
2633
return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
2635
-static __inline uint32x4_t
2637
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2638
vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
2640
return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
2643
-static __inline uint32_t
2644
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2645
vsha1h_u32 (uint32_t hash_e)
2647
return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
2650
-static __inline uint32x4_t
2651
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2652
vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
2654
return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
2657
-static __inline uint32x4_t
2658
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2659
vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
2661
return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
2664
-static __inline uint32x4_t
2665
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2666
vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
2668
return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
2671
-static __inline uint32x4_t
2672
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2673
vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
2675
return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
2678
-static __inline uint32x4_t
2679
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2680
vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
2682
return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
2685
-static __inline uint32x4_t
2686
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2687
vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
2689
return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
2692
-static __inline poly128_t
2693
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
2694
vmull_p64 (poly64_t a, poly64_t b)
2697
__builtin_aarch64_crypto_pmulldi_ppp (a, b);
2700
-static __inline poly128_t
2701
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
2702
vmull_high_p64 (poly64x2_t a, poly64x2_t b)
2704
return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
2705
@@ -22302,6 +22036,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b)
2711
__extension__ static __inline void __attribute__ ((__always_inline__))
2712
vst1q_f32 (float32_t *a, float32x4_t b)
2714
@@ -22314,8 +22050,6 @@ vst1q_f64 (float64_t *a, float64x2_t b)
2715
__builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
2720
__extension__ static __inline void __attribute__ ((__always_inline__))
2721
vst1q_p8 (poly8_t *a, poly8x16_t b)
2723
@@ -22382,6 +22116,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)
2729
+__extension__ static __inline void __attribute__ ((__always_inline__))
2730
+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
2732
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2735
+__extension__ static __inline void __attribute__ ((__always_inline__))
2736
+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
2738
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2741
+__extension__ static __inline void __attribute__ ((__always_inline__))
2742
+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
2744
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2747
+__extension__ static __inline void __attribute__ ((__always_inline__))
2748
+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
2750
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2753
+__extension__ static __inline void __attribute__ ((__always_inline__))
2754
+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
2756
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2759
+__extension__ static __inline void __attribute__ ((__always_inline__))
2760
+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
2762
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2765
+__extension__ static __inline void __attribute__ ((__always_inline__))
2766
+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
2768
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2771
+__extension__ static __inline void __attribute__ ((__always_inline__))
2772
+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
2774
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2777
+__extension__ static __inline void __attribute__ ((__always_inline__))
2778
+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
2780
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2783
+__extension__ static __inline void __attribute__ ((__always_inline__))
2784
+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
2786
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2789
+__extension__ static __inline void __attribute__ ((__always_inline__))
2790
+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
2792
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2795
+__extension__ static __inline void __attribute__ ((__always_inline__))
2796
+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
2798
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2803
+__extension__ static __inline void __attribute__ ((__always_inline__))
2804
+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
2806
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2809
+__extension__ static __inline void __attribute__ ((__always_inline__))
2810
+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
2812
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2815
+__extension__ static __inline void __attribute__ ((__always_inline__))
2816
+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
2818
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2821
+__extension__ static __inline void __attribute__ ((__always_inline__))
2822
+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
2824
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2827
+__extension__ static __inline void __attribute__ ((__always_inline__))
2828
+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
2830
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2833
+__extension__ static __inline void __attribute__ ((__always_inline__))
2834
+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
2836
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2839
+__extension__ static __inline void __attribute__ ((__always_inline__))
2840
+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
2842
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2845
+__extension__ static __inline void __attribute__ ((__always_inline__))
2846
+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
2848
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2851
+__extension__ static __inline void __attribute__ ((__always_inline__))
2852
+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
2854
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2857
+__extension__ static __inline void __attribute__ ((__always_inline__))
2858
+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
2860
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2863
+__extension__ static __inline void __attribute__ ((__always_inline__))
2864
+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
2866
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2869
+__extension__ static __inline void __attribute__ ((__always_inline__))
2870
+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
2872
+ *__a = __aarch64_vget_lane_any (__b, __lane);
2877
__extension__ static __inline void
2878
@@ -23887,7 +23769,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b)
2879
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2880
vtst_s64 (int64x1_t __a, int64x1_t __b)
2882
- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
2883
+ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
2886
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2887
@@ -23911,7 +23793,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b)
2888
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2889
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
2891
- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
2892
+ return ((__a & __b) != __AARCH64_UINT64_C (0));
2895
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2897
+++ b/src/gcc/config/aarch64/driver-aarch64.c
2899
+/* Native CPU detection for aarch64.
2900
+ Copyright (C) 2015 Free Software Foundation, Inc.
2902
+ This file is part of GCC.
2904
+ GCC is free software; you can redistribute it and/or modify
2905
+ it under the terms of the GNU General Public License as published by
2906
+ the Free Software Foundation; either version 3, or (at your option)
2907
+ any later version.
2909
+ GCC is distributed in the hope that it will be useful,
2910
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2911
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2912
+ GNU General Public License for more details.
2914
+ You should have received a copy of the GNU General Public License
2915
+ along with GCC; see the file COPYING3. If not see
2916
+ <http://www.gnu.org/licenses/>. */
2918
+#include "config.h"
2919
+#include "system.h"
2921
+struct arch_extension
2924
+ const char *feat_string;
2927
+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
2928
+ { EXT_NAME, FEATURE_STRING },
2929
+static struct arch_extension ext_to_feat_string[] =
2931
+#include "aarch64-option-extensions.def"
2933
+#undef AARCH64_OPT_EXTENSION
2936
+struct aarch64_core_data
2940
+ const char* implementer_id;
2941
+ const char* part_no;
2944
+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
2945
+ { CORE_NAME, #ARCH, IMP, PART },
2947
+static struct aarch64_core_data cpu_data [] =
2949
+#include "aarch64-cores.def"
2950
+ { NULL, NULL, NULL, NULL }
2953
+#undef AARCH64_CORE
2955
+struct aarch64_arch
2961
+#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
2964
+static struct aarch64_arch aarch64_arches [] =
2966
+#include "aarch64-arches.def"
2970
+#undef AARCH64_ARCH
2972
+/* Return the full architecture name string corresponding to the
2976
+get_arch_name_from_id (const char* id)
2978
+ unsigned int i = 0;
2980
+ for (i = 0; aarch64_arches[i].id != NULL; i++)
2982
+ if (strcmp (id, aarch64_arches[i].id) == 0)
2983
+ return aarch64_arches[i].name;
2990
+/* Check wether the string CORE contains the same CPU part numbers
2991
+ as BL_STRING. For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03"
2992
+ should return true. */
2995
+valid_bL_string_p (const char** core, const char* bL_string)
2997
+ return strstr (bL_string, core[0]) != NULL
2998
+ && strstr (bL_string, core[1]) != NULL;
3001
+/* Return true iff ARR contains STR in one of its two elements. */
3004
+contains_string_p (const char** arr, const char* str)
3008
+ if (arr[0] != NULL)
3010
+ res = strstr (arr[0], str) != NULL;
3014
+ if (arr[1] != NULL)
3015
+ return strstr (arr[1], str) != NULL;
3021
+/* This will be called by the spec parser in gcc.c when it sees
3022
+ a %:local_cpu_detect(args) construct. Currently it will be called
3023
+ with either "arch", "cpu" or "tune" as argument depending on if
3024
+ -march=native, -mcpu=native or -mtune=native is to be substituted.
3026
+ It returns a string containing new command line parameters to be
3027
+ put at the place of the above two options, depending on what CPU
3028
+ this is executed. E.g. "-march=armv8-a" on a Cortex-A57 for
3029
+ -march=native. If the routine can't detect a known processor,
3030
+ the -march or -mtune option is discarded.
3032
+ For -mtune and -mcpu arguments it attempts to detect the CPU or
3033
+ a big.LITTLE system.
3034
+ ARGC and ARGV are set depending on the actual arguments given
3038
+host_detect_local_cpu (int argc, const char **argv)
3040
+ const char *arch_id = NULL;
3041
+ const char *res = NULL;
3042
+ static const int num_exts = ARRAY_SIZE (ext_to_feat_string);
3045
+ bool arch = false;
3046
+ bool tune = false;
3048
+ unsigned int i = 0;
3049
+ unsigned int core_idx = 0;
3050
+ const char* imps[2] = { NULL, NULL };
3051
+ const char* cores[2] = { NULL, NULL };
3052
+ unsigned int n_cores = 0;
3053
+ unsigned int n_imps = 0;
3054
+ bool processed_exts = false;
3055
+ const char *ext_string = "";
3057
+ gcc_assert (argc);
3062
+ /* Are we processing -march, mtune or mcpu? */
3063
+ arch = strcmp (argv[0], "arch") == 0;
3065
+ tune = strcmp (argv[0], "tune") == 0;
3067
+ if (!arch && !tune)
3068
+ cpu = strcmp (argv[0], "cpu") == 0;
3070
+ if (!arch && !tune && !cpu)
3073
+ f = fopen ("/proc/cpuinfo", "r");
3078
+ /* Look through /proc/cpuinfo to determine the implementer
3079
+ and then the part number that identifies a particular core. */
3080
+ while (fgets (buf, sizeof (buf), f) != NULL)
3082
+ if (strstr (buf, "implementer") != NULL)
3084
+ for (i = 0; cpu_data[i].name != NULL; i++)
3085
+ if (strstr (buf, cpu_data[i].implementer_id) != NULL
3086
+ && !contains_string_p (imps, cpu_data[i].implementer_id))
3091
+ imps[n_imps++] = cpu_data[i].implementer_id;
3098
+ if (strstr (buf, "part") != NULL)
3100
+ for (i = 0; cpu_data[i].name != NULL; i++)
3101
+ if (strstr (buf, cpu_data[i].part_no) != NULL
3102
+ && !contains_string_p (cores, cpu_data[i].part_no))
3107
+ cores[n_cores++] = cpu_data[i].part_no;
3109
+ arch_id = cpu_data[i].arch;
3114
+ if (!tune && !processed_exts && strstr (buf, "Features") != NULL)
3116
+ for (i = 0; i < num_exts; i++)
3118
+ bool enabled = true;
3120
+ char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL);
3122
+ p = strtok (feat_string, " ");
3126
+ if (strstr (buf, p) == NULL)
3131
+ p = strtok (NULL, " ");
3133
+ ext_string = concat (ext_string, "+", enabled ? "" : "no",
3134
+ ext_to_feat_string[i].ext, NULL);
3136
+ processed_exts = true;
3143
+ /* Weird cpuinfo format that we don't know how to handle. */
3144
+ if (n_cores == 0 || n_cores > 2 || n_imps != 1)
3147
+ if (arch && !arch_id)
3152
+ const char* arch_name = get_arch_name_from_id (arch_id);
3154
+ /* We got some arch indentifier that's not in aarch64-arches.def? */
3158
+ res = concat ("-march=", arch_name, NULL);
3160
+ /* We have big.LITTLE. */
3161
+ else if (n_cores == 2)
3163
+ for (i = 0; cpu_data[i].name != NULL; i++)
3165
+ if (strchr (cpu_data[i].part_no, '.') != NULL
3166
+ && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0
3167
+ && valid_bL_string_p (cores, cpu_data[i].part_no))
3169
+ res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);
3176
+ /* The simple, non-big.LITTLE case. */
3179
+ if (strncmp (cpu_data[core_idx].implementer_id, imps[0],
3180
+ strlen (imps[0]) - 1) != 0)
3183
+ res = concat ("-m", cpu ? "cpu" : "tune", "=",
3184
+ cpu_data[core_idx].name, NULL);
3190
+ res = concat (res, ext_string, NULL);
3196
+ /* If detection fails we ignore the option.
3197
+ Clean up and return empty string. */
3207
+++ b/src/gcc/config/aarch64/x-aarch64
3209
+driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \
3210
+ $(CONFIG_H) $(SYSTEM_H)
3211
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
3212
--- a/src/gcc/config/alpha/linux.h
3213
+++ b/src/gcc/config/alpha/linux.h
3214
@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3. If not see
3215
#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
3216
#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
3217
#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
3219
+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
3221
#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
3222
#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
3223
#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
3225
+#define OPTION_MUSL (linux_libc == LIBC_MUSL)
3228
/* Determine what functions are present at the runtime;
3229
--- a/src/gcc/config/arm/aarch-common-protos.h
3230
+++ b/src/gcc/config/arm/aarch-common-protos.h
3231
@@ -102,6 +102,8 @@ struct mem_cost_table
3232
const int storef; /* SFmode. */
3233
const int stored; /* DFmode. */
3234
const int store_unaligned; /* Extra for unaligned stores. */
3235
+ const int loadv; /* Vector load. */
3236
+ const int storev; /* Vector store. */
3239
struct fp_cost_table
3240
--- a/src/gcc/config/arm/aarch-cost-tables.h
3241
+++ b/src/gcc/config/arm/aarch-cost-tables.h
3242
@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs =
3243
1, /* stm_regs_per_insn_subsequent. */
3244
COSTS_N_INSNS (2), /* storef. */
3245
COSTS_N_INSNS (3), /* stored. */
3246
- COSTS_N_INSNS (1) /* store_unaligned. */
3247
+ COSTS_N_INSNS (1), /* store_unaligned. */
3248
+ COSTS_N_INSNS (1), /* loadv. */
3249
+ COSTS_N_INSNS (1) /* storev. */
3253
@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs =
3256
COSTS_N_INSNS (1), /* shift. */
3257
- COSTS_N_INSNS (2), /* shift_reg. */
3258
+ 0, /* shift_reg. */
3259
COSTS_N_INSNS (1), /* arith_shift. */
3260
- COSTS_N_INSNS (2), /* arith_shift_reg. */
3261
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
3262
COSTS_N_INSNS (1), /* log_shift. */
3263
- COSTS_N_INSNS (2), /* log_shift_reg. */
3265
+ COSTS_N_INSNS (1), /* log_shift_reg. */
3266
+ COSTS_N_INSNS (1), /* extend. */
3267
COSTS_N_INSNS (1), /* extend_arith. */
3268
COSTS_N_INSNS (1), /* bfi. */
3269
COSTS_N_INSNS (1), /* bfx. */
3270
@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
3271
2, /* stm_regs_per_insn_subsequent. */
3274
- COSTS_N_INSNS (1) /* store_unaligned. */
3275
+ COSTS_N_INSNS (1), /* store_unaligned. */
3276
+ COSTS_N_INSNS (1), /* loadv. */
3277
+ COSTS_N_INSNS (1) /* storev. */
3281
@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
3282
2, /* stm_regs_per_insn_subsequent. */
3285
- COSTS_N_INSNS (1) /* store_unaligned. */
3286
+ COSTS_N_INSNS (1), /* store_unaligned. */
3287
+ COSTS_N_INSNS (1), /* loadv. */
3288
+ COSTS_N_INSNS (1) /* storev. */
3292
@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs =
3295
0, /* store_unaligned. */
3296
+ COSTS_N_INSNS (1), /* loadv. */
3297
+ COSTS_N_INSNS (1) /* storev. */
3301
--- a/src/gcc/config/arm/arm-cores.def
3302
+++ b/src/gcc/config/arm/arm-cores.def
3303
@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex
3304
ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)
3305
ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m)
3306
ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m)
3307
-ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e)
3308
+ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4)
3310
/* V7 big.LITTLE implementations */
3311
ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
3312
--- a/src/gcc/config/arm/arm-protos.h
3313
+++ b/src/gcc/config/arm/arm-protos.h
3314
@@ -66,10 +66,6 @@ extern rtx legitimize_tls_address (rtx, rtx);
3315
extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
3316
extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
3317
extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT);
3318
-extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int,
3320
-extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int,
3322
extern int thumb1_legitimate_address_p (machine_mode, rtx, int);
3323
extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode,
3325
@@ -257,13 +253,6 @@ struct cpu_vec_costs {
3327
struct cpu_cost_table;
3329
-enum arm_sched_autopref
3331
- ARM_SCHED_AUTOPREF_OFF,
3332
- ARM_SCHED_AUTOPREF_RANK,
3333
- ARM_SCHED_AUTOPREF_FULL
3336
/* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this
3337
structure is modified. */
3339
@@ -272,39 +261,57 @@ struct tune_params
3340
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
3341
const struct cpu_cost_table *insn_extra_cost;
3342
bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *);
3343
+ int (*branch_cost) (bool, bool);
3344
+ /* Vectorizer costs. */
3345
+ const struct cpu_vec_costs* vec_costs;
3347
/* Maximum number of instructions to conditionalise. */
3348
int max_insns_skipped;
3349
- int num_prefetch_slots;
3350
- int l1_cache_size;
3351
- int l1_cache_line_size;
3352
- bool prefer_constant_pool;
3353
- int (*branch_cost) (bool, bool);
3354
+ /* Maximum number of instructions to inline calls to memset. */
3355
+ int max_insns_inline_memset;
3356
+ /* Issue rate of the processor. */
3357
+ unsigned int issue_rate;
3358
+ /* Explicit prefetch data. */
3362
+ int l1_cache_size;
3363
+ int l1_cache_line_size;
3365
+ enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE}
3366
+ prefer_constant_pool: 1;
3367
/* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */
3368
- bool prefer_ldrd_strd;
3369
+ enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1;
3370
/* The preference for non short cirtcuit operation when optimizing for
3371
performance. The first element covers Thumb state and the second one
3372
is for ARM state. */
3373
- bool logical_op_non_short_circuit[2];
3374
- /* Vectorizer costs. */
3375
- const struct cpu_vec_costs* vec_costs;
3376
- /* Prefer Neon for 64-bit bitops. */
3377
- bool prefer_neon_for_64bits;
3378
+ enum log_op_non_sc {LOG_OP_NON_SC_FALSE, LOG_OP_NON_SC_TRUE};
3379
+ log_op_non_sc logical_op_non_short_circuit_thumb: 1;
3380
+ log_op_non_sc logical_op_non_short_circuit_arm: 1;
3381
/* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */
3382
- bool disparage_flag_setting_t16_encodings;
3383
- /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
3385
- bool disparage_partial_flag_setting_t16_encodings;
3386
+ enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL}
3387
+ disparage_flag_setting_t16_encodings: 2;
3388
+ enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1;
3389
/* Prefer to inline string operations like memset by using Neon. */
3390
- bool string_ops_prefer_neon;
3391
- /* Maximum number of instructions to inline calls to memset. */
3392
- int max_insns_inline_memset;
3393
- /* Bitfield encoding the fuseable pairs of instructions. */
3394
- unsigned int fuseable_ops;
3395
+ enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE}
3396
+ string_ops_prefer_neon: 1;
3397
+ /* Bitfield encoding the fuseable pairs of instructions. Use FUSE_OPS
3398
+ in an initializer if multiple fusion operations are supported on a
3403
+ FUSE_MOVW_MOVT = 1 << 0
3404
+ } fuseable_ops: 1;
3405
/* Depth of scheduling queue to check for L2 autoprefetcher. */
3406
- enum arm_sched_autopref sched_autopref;
3407
+ enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL}
3408
+ sched_autopref: 2;
3411
+/* Smash multiple fusion operations into a type that can be used for an
3413
+#define FUSE_OPS(x) ((tune_params::fuse_ops) (x))
3415
extern const struct tune_params *current_tune;
3416
extern int vfp3_const_double_for_fract_bits (rtx);
3417
/* return power of two from operand, otherwise 0. */
3418
--- a/src/gcc/config/arm/arm.c
3419
+++ b/src/gcc/config/arm/arm.c
3420
@@ -940,11 +940,13 @@ struct processors
3424
-#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
3425
-#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
3429
+#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
3430
+#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
3437
/* arm generic vectorizer costs. */
3439
@@ -1027,7 +1029,9 @@ const struct cpu_cost_table cortexa9_extra_costs =
3440
2, /* stm_regs_per_insn_subsequent. */
3441
COSTS_N_INSNS (1), /* storef. */
3442
COSTS_N_INSNS (1), /* stored. */
3443
- COSTS_N_INSNS (1) /* store_unaligned. */
3444
+ COSTS_N_INSNS (1), /* store_unaligned. */
3445
+ COSTS_N_INSNS (1), /* loadv. */
3446
+ COSTS_N_INSNS (1) /* storev. */
3450
@@ -1128,7 +1132,9 @@ const struct cpu_cost_table cortexa8_extra_costs =
3451
2, /* stm_regs_per_insn_subsequent. */
3452
COSTS_N_INSNS (1), /* storef. */
3453
COSTS_N_INSNS (1), /* stored. */
3454
- COSTS_N_INSNS (1) /* store_unaligned. */
3455
+ COSTS_N_INSNS (1), /* store_unaligned. */
3456
+ COSTS_N_INSNS (1), /* loadv. */
3457
+ COSTS_N_INSNS (1) /* storev. */
3461
@@ -1230,7 +1236,9 @@ const struct cpu_cost_table cortexa5_extra_costs =
3462
2, /* stm_regs_per_insn_subsequent. */
3463
COSTS_N_INSNS (2), /* storef. */
3464
COSTS_N_INSNS (2), /* stored. */
3465
- COSTS_N_INSNS (1) /* store_unaligned. */
3466
+ COSTS_N_INSNS (1), /* store_unaligned. */
3467
+ COSTS_N_INSNS (1), /* loadv. */
3468
+ COSTS_N_INSNS (1) /* storev. */
3472
@@ -1333,7 +1341,9 @@ const struct cpu_cost_table cortexa7_extra_costs =
3473
2, /* stm_regs_per_insn_subsequent. */
3474
COSTS_N_INSNS (2), /* storef. */
3475
COSTS_N_INSNS (2), /* stored. */
3476
- COSTS_N_INSNS (1) /* store_unaligned. */
3477
+ COSTS_N_INSNS (1), /* store_unaligned. */
3478
+ COSTS_N_INSNS (1), /* loadv. */
3479
+ COSTS_N_INSNS (1) /* storev. */
3483
@@ -1434,7 +1444,9 @@ const struct cpu_cost_table cortexa12_extra_costs =
3484
2, /* stm_regs_per_insn_subsequent. */
3485
COSTS_N_INSNS (2), /* storef. */
3486
COSTS_N_INSNS (2), /* stored. */
3487
- 0 /* store_unaligned. */
3488
+ 0, /* store_unaligned. */
3489
+ COSTS_N_INSNS (1), /* loadv. */
3490
+ COSTS_N_INSNS (1) /* storev. */
3494
@@ -1535,7 +1547,9 @@ const struct cpu_cost_table cortexa15_extra_costs =
3495
2, /* stm_regs_per_insn_subsequent. */
3498
- 0 /* store_unaligned. */
3499
+ 0, /* store_unaligned. */
3500
+ COSTS_N_INSNS (1), /* loadv. */
3501
+ COSTS_N_INSNS (1) /* storev. */
3505
@@ -1636,7 +1650,9 @@ const struct cpu_cost_table v7m_extra_costs =
3506
1, /* stm_regs_per_insn_subsequent. */
3507
COSTS_N_INSNS (2), /* storef. */
3508
COSTS_N_INSNS (3), /* stored. */
3509
- COSTS_N_INSNS (1) /* store_unaligned. */
3510
+ COSTS_N_INSNS (1), /* store_unaligned. */
3511
+ COSTS_N_INSNS (1), /* loadv. */
3512
+ COSTS_N_INSNS (1) /* storev. */
3516
@@ -1678,49 +1694,50 @@ const struct cpu_cost_table v7m_extra_costs =
3520
-#define ARM_FUSE_NOTHING (0)
3521
-#define ARM_FUSE_MOVW_MOVT (1 << 0)
3523
const struct tune_params arm_slowmul_tune =
3525
arm_slowmul_rtx_costs,
3527
- NULL, /* Sched adj cost. */
3528
+ NULL, /* Insn extra costs. */
3529
+ NULL, /* Sched adj cost. */
3530
+ arm_default_branch_cost,
3531
+ &arm_default_vec_cost,
3532
3, /* Constant limit. */
3533
5, /* Max cond insns. */
3534
+ 8, /* Memset max inline. */
3535
+ 1, /* Issue rate. */
3536
ARM_PREFETCH_NOT_BENEFICIAL,
3537
- true, /* Prefer constant pool. */
3538
- arm_default_branch_cost,
3539
- false, /* Prefer LDRD/STRD. */
3540
- {true, true}, /* Prefer non short circuit. */
3541
- &arm_default_vec_cost, /* Vectorizer costs. */
3542
- false, /* Prefer Neon for 64-bits bitops. */
3543
- false, false, /* Prefer 32-bit encodings. */
3544
- false, /* Prefer Neon for stringops. */
3545
- 8, /* Maximum insns to inline memset. */
3546
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3547
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3548
+ tune_params::PREF_CONST_POOL_TRUE,
3549
+ tune_params::PREF_LDRD_FALSE,
3550
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3551
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3552
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3553
+ tune_params::PREF_NEON_64_FALSE,
3554
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3555
+ tune_params::FUSE_NOTHING,
3556
+ tune_params::SCHED_AUTOPREF_OFF
3559
const struct tune_params arm_fastmul_tune =
3561
arm_fastmul_rtx_costs,
3563
- NULL, /* Sched adj cost. */
3564
+ NULL, /* Insn extra costs. */
3565
+ NULL, /* Sched adj cost. */
3566
+ arm_default_branch_cost,
3567
+ &arm_default_vec_cost,
3568
1, /* Constant limit. */
3569
5, /* Max cond insns. */
3570
+ 8, /* Memset max inline. */
3571
+ 1, /* Issue rate. */
3572
ARM_PREFETCH_NOT_BENEFICIAL,
3573
- true, /* Prefer constant pool. */
3574
- arm_default_branch_cost,
3575
- false, /* Prefer LDRD/STRD. */
3576
- {true, true}, /* Prefer non short circuit. */
3577
- &arm_default_vec_cost, /* Vectorizer costs. */
3578
- false, /* Prefer Neon for 64-bits bitops. */
3579
- false, false, /* Prefer 32-bit encodings. */
3580
- false, /* Prefer Neon for stringops. */
3581
- 8, /* Maximum insns to inline memset. */
3582
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3583
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3584
+ tune_params::PREF_CONST_POOL_TRUE,
3585
+ tune_params::PREF_LDRD_FALSE,
3586
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3587
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3588
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3589
+ tune_params::PREF_NEON_64_FALSE,
3590
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3591
+ tune_params::FUSE_NOTHING,
3592
+ tune_params::SCHED_AUTOPREF_OFF
3595
/* StrongARM has early execution of branches, so a sequence that is worth
3596
@@ -1729,233 +1746,279 @@ const struct tune_params arm_fastmul_tune =
3597
const struct tune_params arm_strongarm_tune =
3599
arm_fastmul_rtx_costs,
3601
- NULL, /* Sched adj cost. */
3602
+ NULL, /* Insn extra costs. */
3603
+ NULL, /* Sched adj cost. */
3604
+ arm_default_branch_cost,
3605
+ &arm_default_vec_cost,
3606
1, /* Constant limit. */
3607
3, /* Max cond insns. */
3608
+ 8, /* Memset max inline. */
3609
+ 1, /* Issue rate. */
3610
ARM_PREFETCH_NOT_BENEFICIAL,
3611
- true, /* Prefer constant pool. */
3612
- arm_default_branch_cost,
3613
- false, /* Prefer LDRD/STRD. */
3614
- {true, true}, /* Prefer non short circuit. */
3615
- &arm_default_vec_cost, /* Vectorizer costs. */
3616
- false, /* Prefer Neon for 64-bits bitops. */
3617
- false, false, /* Prefer 32-bit encodings. */
3618
- false, /* Prefer Neon for stringops. */
3619
- 8, /* Maximum insns to inline memset. */
3620
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3621
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3622
+ tune_params::PREF_CONST_POOL_TRUE,
3623
+ tune_params::PREF_LDRD_FALSE,
3624
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3625
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3626
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3627
+ tune_params::PREF_NEON_64_FALSE,
3628
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3629
+ tune_params::FUSE_NOTHING,
3630
+ tune_params::SCHED_AUTOPREF_OFF
3633
const struct tune_params arm_xscale_tune =
3635
arm_xscale_rtx_costs,
3637
+ NULL, /* Insn extra costs. */
3638
xscale_sched_adjust_cost,
3639
+ arm_default_branch_cost,
3640
+ &arm_default_vec_cost,
3641
2, /* Constant limit. */
3642
3, /* Max cond insns. */
3643
+ 8, /* Memset max inline. */
3644
+ 1, /* Issue rate. */
3645
ARM_PREFETCH_NOT_BENEFICIAL,
3646
- true, /* Prefer constant pool. */
3647
- arm_default_branch_cost,
3648
- false, /* Prefer LDRD/STRD. */
3649
- {true, true}, /* Prefer non short circuit. */
3650
- &arm_default_vec_cost, /* Vectorizer costs. */
3651
- false, /* Prefer Neon for 64-bits bitops. */
3652
- false, false, /* Prefer 32-bit encodings. */
3653
- false, /* Prefer Neon for stringops. */
3654
- 8, /* Maximum insns to inline memset. */
3655
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3656
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3657
+ tune_params::PREF_CONST_POOL_TRUE,
3658
+ tune_params::PREF_LDRD_FALSE,
3659
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3660
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3661
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3662
+ tune_params::PREF_NEON_64_FALSE,
3663
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3664
+ tune_params::FUSE_NOTHING,
3665
+ tune_params::SCHED_AUTOPREF_OFF
3668
const struct tune_params arm_9e_tune =
3672
- NULL, /* Sched adj cost. */
3673
+ NULL, /* Insn extra costs. */
3674
+ NULL, /* Sched adj cost. */
3675
+ arm_default_branch_cost,
3676
+ &arm_default_vec_cost,
3677
1, /* Constant limit. */
3678
5, /* Max cond insns. */
3679
+ 8, /* Memset max inline. */
3680
+ 1, /* Issue rate. */
3681
ARM_PREFETCH_NOT_BENEFICIAL,
3682
- true, /* Prefer constant pool. */
3683
+ tune_params::PREF_CONST_POOL_TRUE,
3684
+ tune_params::PREF_LDRD_FALSE,
3685
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3686
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3687
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3688
+ tune_params::PREF_NEON_64_FALSE,
3689
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3690
+ tune_params::FUSE_NOTHING,
3691
+ tune_params::SCHED_AUTOPREF_OFF
3694
+const struct tune_params arm_marvell_pj4_tune =
3697
+ NULL, /* Insn extra costs. */
3698
+ NULL, /* Sched adj cost. */
3699
arm_default_branch_cost,
3700
- false, /* Prefer LDRD/STRD. */
3701
- {true, true}, /* Prefer non short circuit. */
3702
- &arm_default_vec_cost, /* Vectorizer costs. */
3703
- false, /* Prefer Neon for 64-bits bitops. */
3704
- false, false, /* Prefer 32-bit encodings. */
3705
- false, /* Prefer Neon for stringops. */
3706
- 8, /* Maximum insns to inline memset. */
3707
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3708
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3709
+ &arm_default_vec_cost,
3710
+ 1, /* Constant limit. */
3711
+ 5, /* Max cond insns. */
3712
+ 8, /* Memset max inline. */
3713
+ 2, /* Issue rate. */
3714
+ ARM_PREFETCH_NOT_BENEFICIAL,
3715
+ tune_params::PREF_CONST_POOL_TRUE,
3716
+ tune_params::PREF_LDRD_FALSE,
3717
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3718
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3719
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3720
+ tune_params::PREF_NEON_64_FALSE,
3721
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3722
+ tune_params::FUSE_NOTHING,
3723
+ tune_params::SCHED_AUTOPREF_OFF
3726
const struct tune_params arm_v6t2_tune =
3730
- NULL, /* Sched adj cost. */
3731
+ NULL, /* Insn extra costs. */
3732
+ NULL, /* Sched adj cost. */
3733
+ arm_default_branch_cost,
3734
+ &arm_default_vec_cost,
3735
1, /* Constant limit. */
3736
5, /* Max cond insns. */
3737
+ 8, /* Memset max inline. */
3738
+ 1, /* Issue rate. */
3739
ARM_PREFETCH_NOT_BENEFICIAL,
3740
- false, /* Prefer constant pool. */
3741
- arm_default_branch_cost,
3742
- false, /* Prefer LDRD/STRD. */
3743
- {true, true}, /* Prefer non short circuit. */
3744
- &arm_default_vec_cost, /* Vectorizer costs. */
3745
- false, /* Prefer Neon for 64-bits bitops. */
3746
- false, false, /* Prefer 32-bit encodings. */
3747
- false, /* Prefer Neon for stringops. */
3748
- 8, /* Maximum insns to inline memset. */
3749
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3750
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3751
+ tune_params::PREF_CONST_POOL_FALSE,
3752
+ tune_params::PREF_LDRD_FALSE,
3753
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3754
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3755
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3756
+ tune_params::PREF_NEON_64_FALSE,
3757
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3758
+ tune_params::FUSE_NOTHING,
3759
+ tune_params::SCHED_AUTOPREF_OFF
3763
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
3764
const struct tune_params arm_cortex_tune =
3767
&generic_extra_costs,
3768
- NULL, /* Sched adj cost. */
3769
+ NULL, /* Sched adj cost. */
3770
+ arm_default_branch_cost,
3771
+ &arm_default_vec_cost,
3772
1, /* Constant limit. */
3773
5, /* Max cond insns. */
3774
+ 8, /* Memset max inline. */
3775
+ 2, /* Issue rate. */
3776
ARM_PREFETCH_NOT_BENEFICIAL,
3777
- false, /* Prefer constant pool. */
3778
- arm_default_branch_cost,
3779
- false, /* Prefer LDRD/STRD. */
3780
- {true, true}, /* Prefer non short circuit. */
3781
- &arm_default_vec_cost, /* Vectorizer costs. */
3782
- false, /* Prefer Neon for 64-bits bitops. */
3783
- false, false, /* Prefer 32-bit encodings. */
3784
- false, /* Prefer Neon for stringops. */
3785
- 8, /* Maximum insns to inline memset. */
3786
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3787
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3788
+ tune_params::PREF_CONST_POOL_FALSE,
3789
+ tune_params::PREF_LDRD_FALSE,
3790
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3791
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3792
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3793
+ tune_params::PREF_NEON_64_FALSE,
3794
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
3795
+ tune_params::FUSE_NOTHING,
3796
+ tune_params::SCHED_AUTOPREF_OFF
3799
const struct tune_params arm_cortex_a8_tune =
3802
&cortexa8_extra_costs,
3803
- NULL, /* Sched adj cost. */
3804
+ NULL, /* Sched adj cost. */
3805
+ arm_default_branch_cost,
3806
+ &arm_default_vec_cost,
3807
1, /* Constant limit. */
3808
5, /* Max cond insns. */
3809
+ 8, /* Memset max inline. */
3810
+ 2, /* Issue rate. */
3811
ARM_PREFETCH_NOT_BENEFICIAL,
3812
- false, /* Prefer constant pool. */
3813
- arm_default_branch_cost,
3814
- false, /* Prefer LDRD/STRD. */
3815
- {true, true}, /* Prefer non short circuit. */
3816
- &arm_default_vec_cost, /* Vectorizer costs. */
3817
- false, /* Prefer Neon for 64-bits bitops. */
3818
- false, false, /* Prefer 32-bit encodings. */
3819
- true, /* Prefer Neon for stringops. */
3820
- 8, /* Maximum insns to inline memset. */
3821
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3822
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3823
+ tune_params::PREF_CONST_POOL_FALSE,
3824
+ tune_params::PREF_LDRD_FALSE,
3825
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3826
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3827
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3828
+ tune_params::PREF_NEON_64_FALSE,
3829
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
3830
+ tune_params::FUSE_NOTHING,
3831
+ tune_params::SCHED_AUTOPREF_OFF
3834
const struct tune_params arm_cortex_a7_tune =
3837
&cortexa7_extra_costs,
3839
+ NULL, /* Sched adj cost. */
3840
+ arm_default_branch_cost,
3841
+ &arm_default_vec_cost,
3842
1, /* Constant limit. */
3843
5, /* Max cond insns. */
3844
+ 8, /* Memset max inline. */
3845
+ 2, /* Issue rate. */
3846
ARM_PREFETCH_NOT_BENEFICIAL,
3847
- false, /* Prefer constant pool. */
3848
- arm_default_branch_cost,
3849
- false, /* Prefer LDRD/STRD. */
3850
- {true, true}, /* Prefer non short circuit. */
3851
- &arm_default_vec_cost, /* Vectorizer costs. */
3852
- false, /* Prefer Neon for 64-bits bitops. */
3853
- false, false, /* Prefer 32-bit encodings. */
3854
- true, /* Prefer Neon for stringops. */
3855
- 8, /* Maximum insns to inline memset. */
3856
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3857
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3858
+ tune_params::PREF_CONST_POOL_FALSE,
3859
+ tune_params::PREF_LDRD_FALSE,
3860
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3861
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3862
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3863
+ tune_params::PREF_NEON_64_FALSE,
3864
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
3865
+ tune_params::FUSE_NOTHING,
3866
+ tune_params::SCHED_AUTOPREF_OFF
3869
const struct tune_params arm_cortex_a15_tune =
3872
&cortexa15_extra_costs,
3873
- NULL, /* Sched adj cost. */
3874
+ NULL, /* Sched adj cost. */
3875
+ arm_default_branch_cost,
3876
+ &arm_default_vec_cost,
3877
1, /* Constant limit. */
3878
2, /* Max cond insns. */
3879
+ 8, /* Memset max inline. */
3880
+ 3, /* Issue rate. */
3881
ARM_PREFETCH_NOT_BENEFICIAL,
3882
- false, /* Prefer constant pool. */
3883
- arm_default_branch_cost,
3884
- true, /* Prefer LDRD/STRD. */
3885
- {true, true}, /* Prefer non short circuit. */
3886
- &arm_default_vec_cost, /* Vectorizer costs. */
3887
- false, /* Prefer Neon for 64-bits bitops. */
3888
- true, true, /* Prefer 32-bit encodings. */
3889
- true, /* Prefer Neon for stringops. */
3890
- 8, /* Maximum insns to inline memset. */
3891
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3892
- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
3893
+ tune_params::PREF_CONST_POOL_FALSE,
3894
+ tune_params::PREF_LDRD_TRUE,
3895
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3896
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3897
+ tune_params::DISPARAGE_FLAGS_ALL,
3898
+ tune_params::PREF_NEON_64_FALSE,
3899
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
3900
+ tune_params::FUSE_NOTHING,
3901
+ tune_params::SCHED_AUTOPREF_FULL
3904
const struct tune_params arm_cortex_a53_tune =
3907
&cortexa53_extra_costs,
3908
- NULL, /* Scheduler cost adjustment. */
3909
+ NULL, /* Sched adj cost. */
3910
+ arm_default_branch_cost,
3911
+ &arm_default_vec_cost,
3912
1, /* Constant limit. */
3913
5, /* Max cond insns. */
3914
+ 8, /* Memset max inline. */
3915
+ 2, /* Issue rate. */
3916
ARM_PREFETCH_NOT_BENEFICIAL,
3917
- false, /* Prefer constant pool. */
3918
- arm_default_branch_cost,
3919
- false, /* Prefer LDRD/STRD. */
3920
- {true, true}, /* Prefer non short circuit. */
3921
- &arm_default_vec_cost, /* Vectorizer costs. */
3922
- false, /* Prefer Neon for 64-bits bitops. */
3923
- false, false, /* Prefer 32-bit encodings. */
3924
- true, /* Prefer Neon for stringops. */
3925
- 8, /* Maximum insns to inline memset. */
3926
- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
3927
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3928
+ tune_params::PREF_CONST_POOL_FALSE,
3929
+ tune_params::PREF_LDRD_FALSE,
3930
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3931
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3932
+ tune_params::DISPARAGE_FLAGS_NEITHER,
3933
+ tune_params::PREF_NEON_64_FALSE,
3934
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
3935
+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
3936
+ tune_params::SCHED_AUTOPREF_OFF
3939
const struct tune_params arm_cortex_a57_tune =
3942
&cortexa57_extra_costs,
3943
- NULL, /* Scheduler cost adjustment. */
3944
- 1, /* Constant limit. */
3945
- 2, /* Max cond insns. */
3946
- ARM_PREFETCH_NOT_BENEFICIAL,
3947
- false, /* Prefer constant pool. */
3948
+ NULL, /* Sched adj cost. */
3949
arm_default_branch_cost,
3950
- true, /* Prefer LDRD/STRD. */
3951
- {true, true}, /* Prefer non short circuit. */
3952
- &arm_default_vec_cost, /* Vectorizer costs. */
3953
- false, /* Prefer Neon for 64-bits bitops. */
3954
- true, true, /* Prefer 32-bit encodings. */
3955
- true, /* Prefer Neon for stringops. */
3956
- 8, /* Maximum insns to inline memset. */
3957
- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
3958
- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
3959
+ &arm_default_vec_cost,
3960
+ 1, /* Constant limit. */
3961
+ 2, /* Max cond insns. */
3962
+ 8, /* Memset max inline. */
3963
+ 3, /* Issue rate. */
3964
+ ARM_PREFETCH_NOT_BENEFICIAL,
3965
+ tune_params::PREF_CONST_POOL_FALSE,
3966
+ tune_params::PREF_LDRD_TRUE,
3967
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
3968
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
3969
+ tune_params::DISPARAGE_FLAGS_ALL,
3970
+ tune_params::PREF_NEON_64_FALSE,
3971
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
3972
+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
3973
+ tune_params::SCHED_AUTOPREF_FULL
3976
const struct tune_params arm_xgene1_tune =
3979
&xgene1_extra_costs,
3980
- NULL, /* Scheduler cost adjustment. */
3981
- 1, /* Constant limit. */
3982
- 2, /* Max cond insns. */
3983
- ARM_PREFETCH_NOT_BENEFICIAL,
3984
- false, /* Prefer constant pool. */
3985
+ NULL, /* Sched adj cost. */
3986
arm_default_branch_cost,
3987
- true, /* Prefer LDRD/STRD. */
3988
- {true, true}, /* Prefer non short circuit. */
3989
- &arm_default_vec_cost, /* Vectorizer costs. */
3990
- false, /* Prefer Neon for 64-bits bitops. */
3991
- true, true, /* Prefer 32-bit encodings. */
3992
- false, /* Prefer Neon for stringops. */
3993
- 32, /* Maximum insns to inline memset. */
3994
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
3995
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
3996
+ &arm_default_vec_cost,
3997
+ 1, /* Constant limit. */
3998
+ 2, /* Max cond insns. */
3999
+ 32, /* Memset max inline. */
4000
+ 4, /* Issue rate. */
4001
+ ARM_PREFETCH_NOT_BENEFICIAL,
4002
+ tune_params::PREF_CONST_POOL_FALSE,
4003
+ tune_params::PREF_LDRD_TRUE,
4004
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
4005
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
4006
+ tune_params::DISPARAGE_FLAGS_ALL,
4007
+ tune_params::PREF_NEON_64_FALSE,
4008
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4009
+ tune_params::FUSE_NOTHING,
4010
+ tune_params::SCHED_AUTOPREF_OFF
4013
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
4014
@@ -1965,21 +2028,23 @@ const struct tune_params arm_cortex_a5_tune =
4017
&cortexa5_extra_costs,
4018
- NULL, /* Sched adj cost. */
4019
+ NULL, /* Sched adj cost. */
4020
+ arm_cortex_a5_branch_cost,
4021
+ &arm_default_vec_cost,
4022
1, /* Constant limit. */
4023
1, /* Max cond insns. */
4024
+ 8, /* Memset max inline. */
4025
+ 2, /* Issue rate. */
4026
ARM_PREFETCH_NOT_BENEFICIAL,
4027
- false, /* Prefer constant pool. */
4028
- arm_cortex_a5_branch_cost,
4029
- false, /* Prefer LDRD/STRD. */
4030
- {false, false}, /* Prefer non short circuit. */
4031
- &arm_default_vec_cost, /* Vectorizer costs. */
4032
- false, /* Prefer Neon for 64-bits bitops. */
4033
- false, false, /* Prefer 32-bit encodings. */
4034
- true, /* Prefer Neon for stringops. */
4035
- 8, /* Maximum insns to inline memset. */
4036
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4037
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4038
+ tune_params::PREF_CONST_POOL_FALSE,
4039
+ tune_params::PREF_LDRD_FALSE,
4040
+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
4041
+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
4042
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4043
+ tune_params::PREF_NEON_64_FALSE,
4044
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
4045
+ tune_params::FUSE_NOTHING,
4046
+ tune_params::SCHED_AUTOPREF_OFF
4049
const struct tune_params arm_cortex_a9_tune =
4050
@@ -1987,41 +2052,45 @@ const struct tune_params arm_cortex_a9_tune =
4052
&cortexa9_extra_costs,
4053
cortex_a9_sched_adjust_cost,
4054
+ arm_default_branch_cost,
4055
+ &arm_default_vec_cost,
4056
1, /* Constant limit. */
4057
5, /* Max cond insns. */
4058
+ 8, /* Memset max inline. */
4059
+ 2, /* Issue rate. */
4060
ARM_PREFETCH_BENEFICIAL(4,32,32),
4061
- false, /* Prefer constant pool. */
4062
- arm_default_branch_cost,
4063
- false, /* Prefer LDRD/STRD. */
4064
- {true, true}, /* Prefer non short circuit. */
4065
- &arm_default_vec_cost, /* Vectorizer costs. */
4066
- false, /* Prefer Neon for 64-bits bitops. */
4067
- false, false, /* Prefer 32-bit encodings. */
4068
- false, /* Prefer Neon for stringops. */
4069
- 8, /* Maximum insns to inline memset. */
4070
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4071
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4072
+ tune_params::PREF_CONST_POOL_FALSE,
4073
+ tune_params::PREF_LDRD_FALSE,
4074
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
4075
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
4076
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4077
+ tune_params::PREF_NEON_64_FALSE,
4078
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4079
+ tune_params::FUSE_NOTHING,
4080
+ tune_params::SCHED_AUTOPREF_OFF
4083
const struct tune_params arm_cortex_a12_tune =
4086
&cortexa12_extra_costs,
4087
- NULL, /* Sched adj cost. */
4088
+ NULL, /* Sched adj cost. */
4089
+ arm_default_branch_cost,
4090
+ &arm_default_vec_cost, /* Vectorizer costs. */
4091
1, /* Constant limit. */
4092
2, /* Max cond insns. */
4093
+ 8, /* Memset max inline. */
4094
+ 2, /* Issue rate. */
4095
ARM_PREFETCH_NOT_BENEFICIAL,
4096
- false, /* Prefer constant pool. */
4097
- arm_default_branch_cost,
4098
- true, /* Prefer LDRD/STRD. */
4099
- {true, true}, /* Prefer non short circuit. */
4100
- &arm_default_vec_cost, /* Vectorizer costs. */
4101
- false, /* Prefer Neon for 64-bits bitops. */
4102
- true, true, /* Prefer 32-bit encodings. */
4103
- true, /* Prefer Neon for stringops. */
4104
- 8, /* Maximum insns to inline memset. */
4105
- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
4106
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4107
+ tune_params::PREF_CONST_POOL_FALSE,
4108
+ tune_params::PREF_LDRD_TRUE,
4109
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
4110
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
4111
+ tune_params::DISPARAGE_FLAGS_ALL,
4112
+ tune_params::PREF_NEON_64_FALSE,
4113
+ tune_params::PREF_NEON_STRINGOPS_TRUE,
4114
+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
4115
+ tune_params::SCHED_AUTOPREF_OFF
4118
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
4119
@@ -2035,21 +2104,23 @@ const struct tune_params arm_v7m_tune =
4123
- NULL, /* Sched adj cost. */
4124
+ NULL, /* Sched adj cost. */
4125
+ arm_cortex_m_branch_cost,
4126
+ &arm_default_vec_cost,
4127
1, /* Constant limit. */
4128
2, /* Max cond insns. */
4129
+ 8, /* Memset max inline. */
4130
+ 1, /* Issue rate. */
4131
ARM_PREFETCH_NOT_BENEFICIAL,
4132
- true, /* Prefer constant pool. */
4133
- arm_cortex_m_branch_cost,
4134
- false, /* Prefer LDRD/STRD. */
4135
- {false, false}, /* Prefer non short circuit. */
4136
- &arm_default_vec_cost, /* Vectorizer costs. */
4137
- false, /* Prefer Neon for 64-bits bitops. */
4138
- false, false, /* Prefer 32-bit encodings. */
4139
- false, /* Prefer Neon for stringops. */
4140
- 8, /* Maximum insns to inline memset. */
4141
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4142
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4143
+ tune_params::PREF_CONST_POOL_TRUE,
4144
+ tune_params::PREF_LDRD_FALSE,
4145
+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
4146
+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
4147
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4148
+ tune_params::PREF_NEON_64_FALSE,
4149
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4150
+ tune_params::FUSE_NOTHING,
4151
+ tune_params::SCHED_AUTOPREF_OFF
4154
/* Cortex-M7 tuning. */
4155
@@ -2058,21 +2129,23 @@ const struct tune_params arm_cortex_m7_tune =
4159
- NULL, /* Sched adj cost. */
4160
+ NULL, /* Sched adj cost. */
4161
+ arm_cortex_m7_branch_cost,
4162
+ &arm_default_vec_cost,
4163
0, /* Constant limit. */
4164
1, /* Max cond insns. */
4165
+ 8, /* Memset max inline. */
4166
+ 2, /* Issue rate. */
4167
ARM_PREFETCH_NOT_BENEFICIAL,
4168
- true, /* Prefer constant pool. */
4169
- arm_cortex_m7_branch_cost,
4170
- false, /* Prefer LDRD/STRD. */
4171
- {true, true}, /* Prefer non short circuit. */
4172
- &arm_default_vec_cost, /* Vectorizer costs. */
4173
- false, /* Prefer Neon for 64-bits bitops. */
4174
- false, false, /* Prefer 32-bit encodings. */
4175
- false, /* Prefer Neon for stringops. */
4176
- 8, /* Maximum insns to inline memset. */
4177
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4178
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4179
+ tune_params::PREF_CONST_POOL_TRUE,
4180
+ tune_params::PREF_LDRD_FALSE,
4181
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
4182
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
4183
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4184
+ tune_params::PREF_NEON_64_FALSE,
4185
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4186
+ tune_params::FUSE_NOTHING,
4187
+ tune_params::SCHED_AUTOPREF_OFF
4190
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
4191
@@ -2080,43 +2153,47 @@ const struct tune_params arm_cortex_m7_tune =
4192
const struct tune_params arm_v6m_tune =
4196
- NULL, /* Sched adj cost. */
4197
+ NULL, /* Insn extra costs. */
4198
+ NULL, /* Sched adj cost. */
4199
+ arm_default_branch_cost,
4200
+ &arm_default_vec_cost, /* Vectorizer costs. */
4201
1, /* Constant limit. */
4202
5, /* Max cond insns. */
4203
+ 8, /* Memset max inline. */
4204
+ 1, /* Issue rate. */
4205
ARM_PREFETCH_NOT_BENEFICIAL,
4206
- false, /* Prefer constant pool. */
4207
- arm_default_branch_cost,
4208
- false, /* Prefer LDRD/STRD. */
4209
- {false, false}, /* Prefer non short circuit. */
4210
- &arm_default_vec_cost, /* Vectorizer costs. */
4211
- false, /* Prefer Neon for 64-bits bitops. */
4212
- false, false, /* Prefer 32-bit encodings. */
4213
- false, /* Prefer Neon for stringops. */
4214
- 8, /* Maximum insns to inline memset. */
4215
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4216
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4217
+ tune_params::PREF_CONST_POOL_FALSE,
4218
+ tune_params::PREF_LDRD_FALSE,
4219
+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
4220
+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
4221
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4222
+ tune_params::PREF_NEON_64_FALSE,
4223
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4224
+ tune_params::FUSE_NOTHING,
4225
+ tune_params::SCHED_AUTOPREF_OFF
4228
const struct tune_params arm_fa726te_tune =
4232
+ NULL, /* Insn extra costs. */
4233
fa726te_sched_adjust_cost,
4234
+ arm_default_branch_cost,
4235
+ &arm_default_vec_cost,
4236
1, /* Constant limit. */
4237
5, /* Max cond insns. */
4238
+ 8, /* Memset max inline. */
4239
+ 2, /* Issue rate. */
4240
ARM_PREFETCH_NOT_BENEFICIAL,
4241
- true, /* Prefer constant pool. */
4242
- arm_default_branch_cost,
4243
- false, /* Prefer LDRD/STRD. */
4244
- {true, true}, /* Prefer non short circuit. */
4245
- &arm_default_vec_cost, /* Vectorizer costs. */
4246
- false, /* Prefer Neon for 64-bits bitops. */
4247
- false, false, /* Prefer 32-bit encodings. */
4248
- false, /* Prefer Neon for stringops. */
4249
- 8, /* Maximum insns to inline memset. */
4250
- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
4251
- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
4252
+ tune_params::PREF_CONST_POOL_TRUE,
4253
+ tune_params::PREF_LDRD_FALSE,
4254
+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
4255
+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
4256
+ tune_params::DISPARAGE_FLAGS_NEITHER,
4257
+ tune_params::PREF_NEON_64_FALSE,
4258
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
4259
+ tune_params::FUSE_NOTHING,
4260
+ tune_params::SCHED_AUTOPREF_OFF
4264
@@ -3140,31 +3217,33 @@ arm_option_override (void)
4265
&& abi_version_at_least(2))
4266
flag_strict_volatile_bitfields = 1;
4268
- /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
4269
- it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
4270
+ /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
4271
+ have deemed it beneficial (signified by setting
4272
+ prefetch.num_slots to 1 or more). */
4273
if (flag_prefetch_loop_arrays < 0
4276
- && current_tune->num_prefetch_slots > 0)
4277
+ && current_tune->prefetch.num_slots > 0)
4278
flag_prefetch_loop_arrays = 1;
4280
- /* Set up parameters to be used in prefetching algorithm. Do not override the
4281
- defaults unless we are tuning for a core we have researched values for. */
4282
- if (current_tune->num_prefetch_slots > 0)
4283
+ /* Set up parameters to be used in prefetching algorithm. Do not
4284
+ override the defaults unless we are tuning for a core we have
4285
+ researched values for. */
4286
+ if (current_tune->prefetch.num_slots > 0)
4287
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4288
- current_tune->num_prefetch_slots,
4289
- global_options.x_param_values,
4290
- global_options_set.x_param_values);
4291
- if (current_tune->l1_cache_line_size >= 0)
4292
+ current_tune->prefetch.num_slots,
4293
+ global_options.x_param_values,
4294
+ global_options_set.x_param_values);
4295
+ if (current_tune->prefetch.l1_cache_line_size >= 0)
4296
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4297
- current_tune->l1_cache_line_size,
4298
- global_options.x_param_values,
4299
- global_options_set.x_param_values);
4300
- if (current_tune->l1_cache_size >= 0)
4301
+ current_tune->prefetch.l1_cache_line_size,
4302
+ global_options.x_param_values,
4303
+ global_options_set.x_param_values);
4304
+ if (current_tune->prefetch.l1_cache_size >= 0)
4305
maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4306
- current_tune->l1_cache_size,
4307
- global_options.x_param_values,
4308
- global_options_set.x_param_values);
4309
+ current_tune->prefetch.l1_cache_size,
4310
+ global_options.x_param_values,
4311
+ global_options_set.x_param_values);
4313
/* Use Neon to perform 64-bits operations rather than core
4315
@@ -3174,24 +3253,35 @@ arm_option_override (void)
4317
/* Use the alternative scheduling-pressure algorithm by default. */
4318
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
4319
- global_options.x_param_values,
4320
- global_options_set.x_param_values);
4321
+ global_options.x_param_values,
4322
+ global_options_set.x_param_values);
4324
/* Look through ready list and all of queue for instructions
4325
relevant for L2 auto-prefetcher. */
4326
int param_sched_autopref_queue_depth;
4327
- if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
4328
- param_sched_autopref_queue_depth = -1;
4329
- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
4330
- param_sched_autopref_queue_depth = 0;
4331
- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
4332
- param_sched_autopref_queue_depth = max_insn_queue_index + 1;
4334
- gcc_unreachable ();
4336
+ switch (current_tune->sched_autopref)
4338
+ case tune_params::SCHED_AUTOPREF_OFF:
4339
+ param_sched_autopref_queue_depth = -1;
4342
+ case tune_params::SCHED_AUTOPREF_RANK:
4343
+ param_sched_autopref_queue_depth = 0;
4346
+ case tune_params::SCHED_AUTOPREF_FULL:
4347
+ param_sched_autopref_queue_depth = max_insn_queue_index + 1;
4351
+ gcc_unreachable ();
4354
maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
4355
param_sched_autopref_queue_depth,
4356
- global_options.x_param_values,
4357
- global_options_set.x_param_values);
4358
+ global_options.x_param_values,
4359
+ global_options_set.x_param_values);
4361
/* Disable shrink-wrap when optimizing function for size, since it tends to
4362
generate additional returns. */
4363
@@ -7946,236 +8036,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
4368
-arm_legitimize_reload_address (rtx *p,
4369
- machine_mode mode,
4370
- int opnum, int type,
4371
- int ind_levels ATTRIBUTE_UNUSED)
4373
- /* We must recognize output that we have already generated ourselves. */
4374
- if (GET_CODE (*p) == PLUS
4375
- && GET_CODE (XEXP (*p, 0)) == PLUS
4376
- && REG_P (XEXP (XEXP (*p, 0), 0))
4377
- && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
4378
- && CONST_INT_P (XEXP (*p, 1)))
4380
- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
4381
- MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
4382
- VOIDmode, 0, 0, opnum, (enum reload_type) type);
4386
- if (GET_CODE (*p) == PLUS
4387
- && REG_P (XEXP (*p, 0))
4388
- && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
4389
- /* If the base register is equivalent to a constant, let the generic
4390
- code handle it. Otherwise we will run into problems if a future
4391
- reload pass decides to rematerialize the constant. */
4392
- && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
4393
- && CONST_INT_P (XEXP (*p, 1)))
4395
- HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
4396
- HOST_WIDE_INT low, high;
4398
- /* Detect coprocessor load/stores. */
4399
- bool coproc_p = ((TARGET_HARD_FLOAT
4401
- && (mode == SFmode || mode == DFmode))
4402
- || (TARGET_REALLY_IWMMXT
4403
- && VALID_IWMMXT_REG_MODE (mode))
4405
- && (VALID_NEON_DREG_MODE (mode)
4406
- || VALID_NEON_QREG_MODE (mode))));
4408
- /* For some conditions, bail out when lower two bits are unaligned. */
4409
- if ((val & 0x3) != 0
4410
- /* Coprocessor load/store indexes are 8-bits + '00' appended. */
4412
- /* For DI, and DF under soft-float: */
4413
- || ((mode == DImode || mode == DFmode)
4414
- /* Without ldrd, we use stm/ldm, which does not
4415
- fair well with unaligned bits. */
4417
- /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
4418
- || TARGET_THUMB2))))
4421
- /* When breaking down a [reg+index] reload address into [(reg+high)+low],
4422
- of which the (reg+high) gets turned into a reload add insn,
4423
- we try to decompose the index into high/low values that can often
4424
- also lead to better reload CSE.
4426
- ldr r0, [r2, #4100] // Offset too large
4427
- ldr r1, [r2, #4104] // Offset too large
4429
- is best reloaded as:
4435
- which post-reload CSE can simplify in most cases to eliminate the
4436
- second add instruction:
4441
- The idea here is that we want to split out the bits of the constant
4442
- as a mask, rather than as subtracting the maximum offset that the
4443
- respective type of load/store used can handle.
4445
- When encountering negative offsets, we can still utilize it even if
4446
- the overall offset is positive; sometimes this may lead to an immediate
4447
- that can be constructed with fewer instructions.
4449
- ldr r0, [r2, #0x3FFFFC]
4451
- This is best reloaded as:
4452
- add t1, r2, #0x400000
4455
- The trick for spotting this for a load insn with N bits of offset
4456
- (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
4457
- negative offset that is going to make bit N and all the bits below
4458
- it become zero in the remainder part.
4460
- The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
4461
- to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
4462
- used in most cases of ARM load/store instructions. */
4464
-#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
4465
- (((VAL) & ((1 << (N)) - 1)) \
4466
- ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
4471
- low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
4473
- /* NEON quad-word load/stores are made of two double-word accesses,
4474
- so the valid index range is reduced by 8. Treat as 9-bit range if
4476
- if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
4477
- low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
4479
- else if (GET_MODE_SIZE (mode) == 8)
4482
- low = (TARGET_THUMB2
4483
- ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
4484
- : SIGN_MAG_LOW_ADDR_BITS (val, 8));
4486
- /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
4487
- to access doublewords. The supported load/store offsets are
4488
- -8, -4, and 4, which we try to produce here. */
4489
- low = ((val & 0xf) ^ 0x8) - 0x8;
4491
- else if (GET_MODE_SIZE (mode) < 8)
4493
- /* NEON element load/stores do not have an offset. */
4494
- if (TARGET_NEON_FP16 && mode == HFmode)
4497
- if (TARGET_THUMB2)
4499
- /* Thumb-2 has an asymmetrical index range of (-256,4096).
4500
- Try the wider 12-bit range first, and re-try if the result
4501
- is out of range. */
4502
- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
4504
- low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
4508
- if (mode == HImode || mode == HFmode)
4511
- low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
4514
- /* The storehi/movhi_bytes fallbacks can use only
4515
- [-4094,+4094] of the full ldrb/strb index range. */
4516
- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
4517
- if (low == 4095 || low == -4095)
4522
- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
4528
- high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
4529
- ^ (unsigned HOST_WIDE_INT) 0x80000000)
4530
- - (unsigned HOST_WIDE_INT) 0x80000000);
4531
- /* Check for overflow or zero */
4532
- if (low == 0 || high == 0 || (high + low != val))
4535
- /* Reload the high part into a base reg; leave the low part
4537
- Note that replacing this gen_rtx_PLUS with plus_constant is
4538
- wrong in this case because we rely on the
4539
- (plus (plus reg c1) c2) structure being preserved so that
4540
- XEXP (*p, 0) in push_reload below uses the correct term. */
4541
- *p = gen_rtx_PLUS (GET_MODE (*p),
4542
- gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
4545
- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
4546
- MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
4547
- VOIDmode, 0, 0, opnum, (enum reload_type) type);
4555
-thumb_legitimize_reload_address (rtx *x_p,
4556
- machine_mode mode,
4557
- int opnum, int type,
4558
- int ind_levels ATTRIBUTE_UNUSED)
4562
- if (GET_CODE (x) == PLUS
4563
- && GET_MODE_SIZE (mode) < 4
4564
- && REG_P (XEXP (x, 0))
4565
- && XEXP (x, 0) == stack_pointer_rtx
4566
- && CONST_INT_P (XEXP (x, 1))
4567
- && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4572
- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4573
- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4577
- /* If both registers are hi-regs, then it's better to reload the
4578
- entire expression rather than each register individually. That
4579
- only requires one reload register rather than two. */
4580
- if (GET_CODE (x) == PLUS
4581
- && REG_P (XEXP (x, 0))
4582
- && REG_P (XEXP (x, 1))
4583
- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4584
- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4589
- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4590
- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4597
/* Return TRUE if X contains any TLS symbol references. */
4600
@@ -9399,7 +9259,8 @@ static bool
4601
arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
4603
const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
4604
- gcc_assert (GET_CODE (x) == UNSPEC);
4605
+ rtx_code code = GET_CODE (x);
4606
+ gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
4608
switch (XINT (x, 1))
4610
@@ -9445,7 +9306,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
4611
*cost = COSTS_N_INSNS (2);
4618
/* Cost of a libcall. We assume one insn per argument, an amount for the
4619
@@ -11008,6 +10869,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
4620
*cost = LIBCALL_COST (1);
4623
+ case UNSPEC_VOLATILE:
4625
return arm_unspec_cost (x, outer_code, speed_p, cost);
4627
@@ -17287,14 +17149,16 @@ thumb2_reorg (void)
4629
FOR_EACH_BB_FN (bb, cfun)
4631
- if (current_tune->disparage_flag_setting_t16_encodings
4632
+ if ((current_tune->disparage_flag_setting_t16_encodings
4633
+ == tune_params::DISPARAGE_FLAGS_ALL)
4634
&& optimize_bb_for_speed_p (bb))
4638
Convert_Action action = SKIP;
4639
Convert_Action action_for_partial_flag_setting
4640
- = (current_tune->disparage_partial_flag_setting_t16_encodings
4641
+ = ((current_tune->disparage_flag_setting_t16_encodings
4642
+ != tune_params::DISPARAGE_FLAGS_NEITHER)
4643
&& optimize_bb_for_speed_p (bb))
4646
@@ -25660,12 +25524,12 @@ arm_print_tune_info (void)
4647
current_tune->constant_limit);
4648
asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
4649
current_tune->max_insns_skipped);
4650
- asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
4651
- current_tune->num_prefetch_slots);
4652
- asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
4653
- current_tune->l1_cache_size);
4654
- asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
4655
- current_tune->l1_cache_line_size);
4656
+ asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
4657
+ current_tune->prefetch.num_slots);
4658
+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
4659
+ current_tune->prefetch.l1_cache_size);
4660
+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
4661
+ current_tune->prefetch.l1_cache_line_size);
4662
asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
4663
(int) current_tune->prefer_constant_pool);
4664
asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
4665
@@ -25681,17 +25545,13 @@ arm_print_tune_info (void)
4666
asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
4667
(int) current_tune->prefer_ldrd_strd);
4668
asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
4669
- (int) current_tune->logical_op_non_short_circuit[0],
4670
- (int) current_tune->logical_op_non_short_circuit[1]);
4671
+ (int) current_tune->logical_op_non_short_circuit_thumb,
4672
+ (int) current_tune->logical_op_non_short_circuit_arm);
4673
asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
4674
(int) current_tune->prefer_neon_for_64bits);
4675
asm_fprintf (asm_out_file,
4676
"\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
4677
(int) current_tune->disparage_flag_setting_t16_encodings);
4678
- asm_fprintf (asm_out_file,
4679
- "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
4680
- (int) current_tune
4681
- ->disparage_partial_flag_setting_t16_encodings);
4682
asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
4683
(int) current_tune->string_ops_prefer_neon);
4684
asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
4685
@@ -27213,40 +27073,12 @@ thumb2_output_casesi (rtx *operands)
4689
-/* Most ARM cores are single issue, but some newer ones can dual issue.
4690
- The scheduler descriptions rely on this being correct. */
4691
+/* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
4692
+ per-core tuning structs. */
4694
arm_issue_rate (void)
4725
+ return current_tune->issue_rate;
4728
/* Return how many instructions should scheduler lookahead to choose the
4729
@@ -29411,7 +29243,7 @@ arm_gen_setmem (rtx *operands)
4731
arm_macro_fusion_p (void)
4733
- return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
4734
+ return current_tune->fuseable_ops != tune_params::FUSE_NOTHING;
4738
@@ -29432,44 +29264,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
4739
if (!arm_macro_fusion_p ())
4742
- if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
4743
+ if (current_tune->fuseable_ops & tune_params::FUSE_MOVW_MOVT)
4745
/* We are trying to fuse
4746
- movw imm / movt imm
4747
- instructions as a group that gets scheduled together. */
4748
+ movw imm / movt imm
4749
+ instructions as a group that gets scheduled together. */
4751
set_dest = SET_DEST (curr_set);
4753
if (GET_MODE (set_dest) != SImode)
4757
/* We are trying to match:
4758
- prev (movw) == (set (reg r0) (const_int imm16))
4759
- curr (movt) == (set (zero_extract (reg r0)
4762
- (const_int imm16_1))
4764
- prev (movw) == (set (reg r1)
4765
- (high (symbol_ref ("SYM"))))
4766
- curr (movt) == (set (reg r0)
4768
- (symbol_ref ("SYM")))) */
4769
+ prev (movw) == (set (reg r0) (const_int imm16))
4770
+ curr (movt) == (set (zero_extract (reg r0)
4773
+ (const_int imm16_1))
4775
+ prev (movw) == (set (reg r1)
4776
+ (high (symbol_ref ("SYM"))))
4777
+ curr (movt) == (set (reg r0)
4779
+ (symbol_ref ("SYM")))) */
4780
if (GET_CODE (set_dest) == ZERO_EXTRACT)
4782
- if (CONST_INT_P (SET_SRC (curr_set))
4783
- && CONST_INT_P (SET_SRC (prev_set))
4784
- && REG_P (XEXP (set_dest, 0))
4785
- && REG_P (SET_DEST (prev_set))
4786
- && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
4790
+ if (CONST_INT_P (SET_SRC (curr_set))
4791
+ && CONST_INT_P (SET_SRC (prev_set))
4792
+ && REG_P (XEXP (set_dest, 0))
4793
+ && REG_P (SET_DEST (prev_set))
4794
+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
4797
else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
4798
- && REG_P (SET_DEST (curr_set))
4799
- && REG_P (SET_DEST (prev_set))
4800
- && GET_CODE (SET_SRC (prev_set)) == HIGH
4801
- && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
4803
+ && REG_P (SET_DEST (curr_set))
4804
+ && REG_P (SET_DEST (prev_set))
4805
+ && GET_CODE (SET_SRC (prev_set)) == HIGH
4806
+ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
4811
--- a/src/gcc/config/arm/arm.h
4812
+++ b/src/gcc/config/arm/arm.h
4813
@@ -1360,46 +1360,6 @@ enum reg_class
4814
? GENERAL_REGS : NO_REGS) \
4815
: THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
4817
-/* Try a machine-dependent way of reloading an illegitimate address
4818
- operand. If we find one, push the reload and jump to WIN. This
4819
- macro is used in only one place: `find_reloads_address' in reload.c.
4821
- For the ARM, we wish to handle large displacements off a base
4822
- register by splitting the addend across a MOV and the mem insn.
4823
- This can cut the number of reloads needed. */
4824
-#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \
4827
- if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \
4832
-/* XXX If an HImode FP+large_offset address is converted to an HImode
4833
- SP+large_offset address, then reload won't know how to fix it. It sees
4834
- only that SP isn't valid for HImode, and so reloads the SP into an index
4835
- register, but the resulting address is still invalid because the offset
4836
- is too big. We fix it here instead by reloading the entire address. */
4837
-/* We could probably achieve better results by defining PROMOTE_MODE to help
4838
- cope with the variances between the Thumb's signed and unsigned byte and
4839
- halfword load instructions. */
4840
-/* ??? This should be safe for thumb2, but we may be able to do better. */
4841
-#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \
4843
- rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \
4851
-#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \
4853
- ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \
4855
- THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)
4857
/* Return the maximum number of consecutive registers
4858
needed to represent mode MODE in a register of class CLASS.
4859
ARM regs are UNITS_PER_WORD bits.
4860
@@ -2096,10 +2056,11 @@ enum arm_auto_incmodes
4861
(current_tune->branch_cost (speed_p, predictable_p))
4863
/* False if short circuit operation is preferred. */
4864
-#define LOGICAL_OP_NON_SHORT_CIRCUIT \
4865
- ((optimize_size) \
4866
- ? (TARGET_THUMB ? false : true) \
4867
- : (current_tune->logical_op_non_short_circuit[TARGET_ARM]))
4868
+#define LOGICAL_OP_NON_SHORT_CIRCUIT \
4869
+ ((optimize_size) \
4870
+ ? (TARGET_THUMB ? false : true) \
4871
+ : TARGET_THUMB ? static_cast<bool> (current_tune->logical_op_non_short_circuit_thumb) \
4872
+ : static_cast<bool> (current_tune->logical_op_non_short_circuit_arm))
4875
/* Position Independent Code. */
4876
--- a/src/gcc/config/arm/arm.md
4877
+++ b/src/gcc/config/arm/arm.md
4878
@@ -1177,9 +1177,9 @@
4880
; ??? Check Thumb-2 split length
4881
(define_insn_and_split "*arm_subsi3_insn"
4882
- [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r")
4883
- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n")
4884
- (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))]
4885
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r")
4886
+ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n")
4887
+ (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))]
4891
@@ -2768,6 +2768,55 @@
4892
(const_string "logic_shift_reg")))]
4895
+;; Shifted bics pattern used to set up CC status register and not reusing
4896
+;; bics output. Pattern restricts Thumb2 shift operand as bics for Thumb2
4897
+;; does not support shift by register.
4898
+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
4899
+ [(set (reg:CC_NOOV CC_REGNUM)
4901
+ (and:SI (not:SI (match_operator:SI 0 "shift_operator"
4902
+ [(match_operand:SI 1 "s_register_operand" "r")
4903
+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))
4904
+ (match_operand:SI 3 "s_register_operand" "r"))
4906
+ (clobber (match_scratch:SI 4 "=r"))]
4907
+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
4908
+ "bic%.%?\\t%4, %3, %1%S0"
4909
+ [(set_attr "predicable" "yes")
4910
+ (set_attr "predicable_short_it" "no")
4911
+ (set_attr "conds" "set")
4912
+ (set_attr "shift" "1")
4913
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
4914
+ (const_string "logic_shift_imm")
4915
+ (const_string "logic_shift_reg")))]
4918
+;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also
4919
+;; getting reused later.
4920
+(define_insn "andsi_not_shiftsi_si_scc"
4921
+ [(parallel [(set (reg:CC_NOOV CC_REGNUM)
4923
+ (and:SI (not:SI (match_operator:SI 0 "shift_operator"
4924
+ [(match_operand:SI 1 "s_register_operand" "r")
4925
+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))
4926
+ (match_operand:SI 3 "s_register_operand" "r"))
4928
+ (set (match_operand:SI 4 "s_register_operand" "=r")
4929
+ (and:SI (not:SI (match_op_dup 0
4932
+ (match_dup 3)))])]
4933
+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
4934
+ "bic%.%?\\t%4, %3, %1%S0"
4935
+ [(set_attr "predicable" "yes")
4936
+ (set_attr "predicable_short_it" "no")
4937
+ (set_attr "conds" "set")
4938
+ (set_attr "shift" "1")
4939
+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
4940
+ (const_string "logic_shift_imm")
4941
+ (const_string "logic_shift_reg")))]
4944
(define_insn "*andsi_notsi_si_compare0"
4945
[(set (reg:CC_NOOV CC_REGNUM)
4947
@@ -5076,7 +5125,7 @@
4950
[(set (match_operand:SI 0 "s_register_operand" "")
4951
- (ior_xor:SI (and:SI (ashift:SI
4952
+ (IOR_XOR:SI (and:SI (ashift:SI
4953
(match_operand:SI 1 "s_register_operand" "")
4954
(match_operand:SI 2 "const_int_operand" ""))
4955
(match_operand:SI 3 "const_int_operand" ""))
4956
@@ -5088,7 +5137,7 @@
4957
== (GET_MODE_MASK (GET_MODE (operands[5]))
4958
& (GET_MODE_MASK (GET_MODE (operands[5]))
4959
<< (INTVAL (operands[2])))))"
4960
- [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
4961
+ [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2))
4963
(set (match_dup 0) (zero_extend:SI (match_dup 5)))]
4964
"operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
4965
@@ -5667,7 +5716,7 @@
4966
[(set_attr "predicable" "yes")
4967
(set_attr "predicable_short_it" "no")
4968
(set_attr "length" "4")
4969
- (set_attr "type" "mov_imm")]
4970
+ (set_attr "type" "alu_sreg")]
4973
(define_insn "*arm_movsi_insn"
4974
@@ -6712,7 +6761,7 @@
4976
/* Support only fixed point registers. */
4977
if (!CONST_INT_P (operands[2])
4978
- || INTVAL (operands[2]) > 14
4979
+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS
4980
|| INTVAL (operands[2]) < 2
4981
|| !MEM_P (operands[1])
4982
|| !REG_P (operands[0])
4983
@@ -6737,7 +6786,7 @@
4985
/* Support only fixed point registers. */
4986
if (!CONST_INT_P (operands[2])
4987
- || INTVAL (operands[2]) > 14
4988
+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS
4989
|| INTVAL (operands[2]) < 2
4990
|| !REG_P (operands[1])
4991
|| !MEM_P (operands[0])
4992
@@ -6922,7 +6971,7 @@
4993
[(set_attr "conds" "set")
4994
(set_attr "shift" "1")
4995
(set_attr "arch" "32,a,a")
4996
- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
4997
+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
4999
(define_insn "*cmpsi_shiftsi_swp"
5000
[(set (reg:CC_SWP CC_REGNUM)
5001
@@ -6935,7 +6984,7 @@
5002
[(set_attr "conds" "set")
5003
(set_attr "shift" "1")
5004
(set_attr "arch" "32,a,a")
5005
- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
5006
+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
5008
(define_insn "*arm_cmpsi_negshiftsi_si"
5009
[(set (reg:CC_Z CC_REGNUM)
5010
@@ -7528,10 +7577,10 @@
5011
(const_string "mov_imm")
5012
(const_string "mov_reg"))
5013
(const_string "mvn_imm")
5014
- (const_string "mov_reg")
5015
- (const_string "mov_reg")
5016
- (const_string "mov_reg")
5017
- (const_string "mov_reg")])]
5018
+ (const_string "multiple")
5019
+ (const_string "multiple")
5020
+ (const_string "multiple")
5021
+ (const_string "multiple")])]
5024
(define_insn "*movsfcc_soft_insn"
5025
@@ -7884,7 +7933,7 @@
5028
(define_expand "<return_str>return"
5031
"(TARGET_ARM || (TARGET_THUMB2
5032
&& ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
5033
&& !IS_STACKALIGN (arm_current_func_type ())))
5034
@@ -7922,7 +7971,7 @@
5036
(if_then_else (match_operator 0 "arm_comparison_operator"
5037
[(match_operand 1 "cc_register" "") (const_int 0)])
5041
"TARGET_ARM <return_cond_true>"
5043
@@ -7945,7 +7994,7 @@
5044
(if_then_else (match_operator 0 "arm_comparison_operator"
5045
[(match_operand 1 "cc_register" "") (const_int 0)])
5049
"TARGET_ARM <return_cond_true>"
5052
@@ -8279,7 +8328,7 @@
5054
(define_insn "*<arith_shift_insn>_multsi"
5055
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
5058
(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
5059
(match_operand:SI 3 "power_of_two_operand" ""))
5060
(match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
5061
@@ -8293,7 +8342,7 @@
5063
(define_insn "*<arith_shift_insn>_shiftsi"
5064
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
5067
(match_operator:SI 2 "shift_nomul_operator"
5068
[(match_operand:SI 3 "s_register_operand" "r,r,r")
5069
(match_operand:SI 4 "shift_amount_operand" "M,M,r")])
5070
@@ -8689,7 +8738,14 @@
5073
[(set_attr "conds" "use")
5074
- (set_attr "type" "mov_reg,mov_reg,multiple")
5075
+ (set_attr_alternative "type"
5076
+ [(if_then_else (match_operand 2 "const_int_operand" "")
5077
+ (const_string "mov_imm")
5078
+ (const_string "mov_reg"))
5079
+ (if_then_else (match_operand 1 "const_int_operand" "")
5080
+ (const_string "mov_imm")
5081
+ (const_string "mov_reg"))
5082
+ (const_string "multiple")])
5083
(set_attr "length" "4,4,8")]
5086
@@ -9485,8 +9541,8 @@
5087
(const_string "alu_imm" )
5088
(const_string "alu_sreg"))
5089
(const_string "alu_imm")
5090
- (const_string "alu_sreg")
5091
- (const_string "alu_sreg")])]
5092
+ (const_string "multiple")
5093
+ (const_string "multiple")])]
5096
(define_insn "*ifcompare_move_plus"
5097
@@ -9523,7 +9579,13 @@
5098
sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
5099
[(set_attr "conds" "use")
5100
(set_attr "length" "4,4,8,8")
5101
- (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")]
5102
+ (set_attr_alternative "type"
5103
+ [(if_then_else (match_operand 3 "const_int_operand" "")
5104
+ (const_string "alu_imm" )
5105
+ (const_string "alu_sreg"))
5106
+ (const_string "alu_imm")
5107
+ (const_string "multiple")
5108
+ (const_string "multiple")])]
5111
(define_insn "*ifcompare_arith_arith"
5112
@@ -9618,7 +9680,11 @@
5113
%I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
5114
[(set_attr "conds" "use")
5115
(set_attr "length" "4,8")
5116
- (set_attr "type" "alu_shift_reg,multiple")]
5117
+ (set_attr_alternative "type"
5118
+ [(if_then_else (match_operand 3 "const_int_operand" "")
5119
+ (const_string "alu_shift_imm" )
5120
+ (const_string "alu_shift_reg"))
5121
+ (const_string "multiple")])]
5124
(define_insn "*ifcompare_move_arith"
5125
@@ -9679,7 +9745,11 @@
5126
%I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
5127
[(set_attr "conds" "use")
5128
(set_attr "length" "4,8")
5129
- (set_attr "type" "alu_shift_reg,multiple")]
5130
+ (set_attr_alternative "type"
5131
+ [(if_then_else (match_operand 3 "const_int_operand" "")
5132
+ (const_string "alu_shift_imm" )
5133
+ (const_string "alu_shift_reg"))
5134
+ (const_string "multiple")])]
5137
(define_insn "*ifcompare_move_not"
5138
@@ -9786,7 +9856,12 @@
5139
[(set_attr "conds" "use")
5140
(set_attr "shift" "2")
5141
(set_attr "length" "4,8,8")
5142
- (set_attr "type" "mov_shift_reg,multiple,multiple")]
5143
+ (set_attr_alternative "type"
5144
+ [(if_then_else (match_operand 3 "const_int_operand" "")
5145
+ (const_string "mov_shift" )
5146
+ (const_string "mov_shift_reg"))
5147
+ (const_string "multiple")
5148
+ (const_string "multiple")])]
5151
(define_insn "*ifcompare_move_shift"
5152
@@ -9824,7 +9899,12 @@
5153
[(set_attr "conds" "use")
5154
(set_attr "shift" "2")
5155
(set_attr "length" "4,8,8")
5156
- (set_attr "type" "mov_shift_reg,multiple,multiple")]
5157
+ (set_attr_alternative "type"
5158
+ [(if_then_else (match_operand 3 "const_int_operand" "")
5159
+ (const_string "mov_shift" )
5160
+ (const_string "mov_shift_reg"))
5161
+ (const_string "multiple")
5162
+ (const_string "multiple")])]
5165
(define_insn "*ifcompare_shift_shift"
5166
@@ -10905,7 +10985,7 @@
5167
[(set_attr "predicable" "yes")
5168
(set_attr "predicable_short_it" "no")
5169
(set_attr "length" "4")
5170
- (set_attr "type" "mov_imm")]
5171
+ (set_attr "type" "alu_sreg")]
5174
(define_insn "*arm_rev"
5175
--- a/src/gcc/config/arm/iterators.md
5176
+++ b/src/gcc/config/arm/iterators.md
5177
@@ -181,39 +181,53 @@
5178
;; compare a second time.
5179
(define_code_iterator LTUGEU [ltu geu])
5181
+;; The signed gt, ge comparisons
5182
+(define_code_iterator GTGE [gt ge])
5184
+;; The unsigned gt, ge comparisons
5185
+(define_code_iterator GTUGEU [gtu geu])
5187
+;; Comparisons for vc<cmp>
5188
+(define_code_iterator COMPARISONS [eq gt ge le lt])
5191
-(define_code_iterator ior_xor [ior xor])
5192
+(define_code_iterator IOR_XOR [ior xor])
5194
;; Operations on two halves of a quadword vector.
5195
-(define_code_iterator vqh_ops [plus smin smax umin umax])
5196
+(define_code_iterator VQH_OPS [plus smin smax umin umax])
5198
;; Operations on two halves of a quadword vector,
5199
;; without unsigned variants (for use with *SFmode pattern).
5200
-(define_code_iterator vqhs_ops [plus smin smax])
5201
+(define_code_iterator VQHS_OPS [plus smin smax])
5203
;; A list of widening operators
5204
(define_code_iterator SE [sign_extend zero_extend])
5207
-(define_code_iterator rshifts [ashiftrt lshiftrt])
5208
+(define_code_iterator RSHIFTS [ashiftrt lshiftrt])
5210
;; Iterator for integer conversions
5211
(define_code_iterator FIXUORS [fix unsigned_fix])
5213
;; Binary operators whose second operand can be shifted.
5214
-(define_code_iterator shiftable_ops [plus minus ior xor and])
5215
+(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])
5217
-;; plus and minus are the only shiftable_ops for which Thumb2 allows
5218
+;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
5219
;; a stack pointer opoerand. The minus operation is a candidate for an rsub
5220
;; and hence only plus is supported.
5221
(define_code_attr t2_binop0
5222
[(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
5224
-;; The instruction to use when a shiftable_ops has a shift operation as
5225
+;; The instruction to use when a SHIFTABLE_OPS has a shift operation as
5226
;; its first operand.
5227
(define_code_attr arith_shift_insn
5228
[(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
5230
+(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le")
5231
+ (gtu "gt") (geu "ge")])
5233
+(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")])
5235
;;----------------------------------------------------------------------------
5237
;;----------------------------------------------------------------------------
5238
@@ -221,6 +235,10 @@
5239
(define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
5240
UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
5242
+(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE])
5244
+(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT])
5246
(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
5248
(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
5249
@@ -677,6 +695,11 @@
5253
+(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")
5254
+ (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")
5255
+ (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")
5256
+ (UNSPEC_VCAGT "gt")])
5258
(define_int_attr r [
5259
(UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r")
5260
(UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "")
5262
(UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])
5264
;; Both kinds of return insn.
5265
-(define_code_iterator returns [return simple_return])
5266
+(define_code_iterator RETURNS [return simple_return])
5267
(define_code_attr return_str [(return "") (simple_return "simple_")])
5268
(define_code_attr return_simple_p [(return "false") (simple_return "true")])
5269
(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
5270
--- a/src/gcc/config/arm/iwmmxt.md
5271
+++ b/src/gcc/config/arm/iwmmxt.md
5275
(define_insn "*iwmmxt_arm_movdi"
5276
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv")
5277
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))]
5278
+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
5279
+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,r,y,Uy,y, r,*w,*w,*Uvi,*w"))]
5280
"TARGET_REALLY_IWMMXT
5281
&& ( register_operand (operands[0], DImode)
5282
|| register_operand (operands[1], DImode))"
5283
--- a/src/gcc/config/arm/linux-eabi.h
5284
+++ b/src/gcc/config/arm/linux-eabi.h
5286
%{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \
5287
%{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"
5289
+/* For ARM musl currently supports four dynamic linkers:
5290
+ - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI
5291
+ - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI
5292
+ - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB
5293
+ - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB
5294
+ musl does not support the legacy OABI mode.
5295
+ All the dynamic linkers live in /lib.
5296
+ We default to soft-float, EL. */
5297
+#undef MUSL_DYNAMIC_LINKER
5298
+#if TARGET_BIG_ENDIAN_DEFAULT
5299
+#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}"
5301
+#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}"
5303
+#define MUSL_DYNAMIC_LINKER \
5304
+ "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1"
5306
/* At this point, bpabi.h will have clobbered LINK_SPEC. We want to
5307
use the GNU/Linux version, not the generic BPABI version. */
5312
#define ENDFILE_SPEC \
5313
+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \
5314
LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
5316
/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
5317
--- a/src/gcc/config/arm/neon.md
5318
+++ b/src/gcc/config/arm/neon.md
5319
@@ -1114,7 +1114,7 @@
5321
(define_insn_and_split "<shift>di3_neon"
5322
[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
5323
- (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
5324
+ (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
5325
(match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
5326
(clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
5327
(clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
5328
@@ -1194,71 +1194,6 @@
5329
[(set_attr "type" "neon_add_widen")]
5332
-;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
5333
-;; shift-count granularity. That's good enough for the middle-end's current
5336
-;; Note that it's not safe to perform such an operation in big-endian mode,
5337
-;; due to element-ordering issues.
5339
-(define_expand "vec_shr_<mode>"
5340
- [(match_operand:VDQ 0 "s_register_operand" "")
5341
- (match_operand:VDQ 1 "s_register_operand" "")
5342
- (match_operand:SI 2 "const_multiple_of_8_operand" "")]
5343
- "TARGET_NEON && !BYTES_BIG_ENDIAN"
5346
- HOST_WIDE_INT num_bits = INTVAL (operands[2]);
5347
- const int width = GET_MODE_BITSIZE (<MODE>mode);
5348
- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
5349
- rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
5350
- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
5352
- if (num_bits == width)
5354
- emit_move_insn (operands[0], operands[1]);
5358
- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
5359
- operands[0] = gen_lowpart (bvecmode, operands[0]);
5360
- operands[1] = gen_lowpart (bvecmode, operands[1]);
5362
- emit_insn (gen_ext (operands[0], operands[1], zero_reg,
5363
- GEN_INT (num_bits / BITS_PER_UNIT)));
5367
-(define_expand "vec_shl_<mode>"
5368
- [(match_operand:VDQ 0 "s_register_operand" "")
5369
- (match_operand:VDQ 1 "s_register_operand" "")
5370
- (match_operand:SI 2 "const_multiple_of_8_operand" "")]
5371
- "TARGET_NEON && !BYTES_BIG_ENDIAN"
5374
- HOST_WIDE_INT num_bits = INTVAL (operands[2]);
5375
- const int width = GET_MODE_BITSIZE (<MODE>mode);
5376
- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
5377
- rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
5378
- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
5380
- if (num_bits == 0)
5382
- emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
5386
- num_bits = width - num_bits;
5388
- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
5389
- operands[0] = gen_lowpart (bvecmode, operands[0]);
5390
- operands[1] = gen_lowpart (bvecmode, operands[1]);
5392
- emit_insn (gen_ext (operands[0], zero_reg, operands[1],
5393
- GEN_INT (num_bits / BITS_PER_UNIT)));
5397
;; Helpers for quad-word reduction operations
5399
; Add (or smin, smax...) the low N/2 elements of the N-element vector
5400
@@ -1267,7 +1202,7 @@
5402
(define_insn "quad_halves_<code>v4si"
5403
[(set (match_operand:V2SI 0 "s_register_operand" "=w")
5406
(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
5407
(parallel [(const_int 0) (const_int 1)]))
5408
(vec_select:V2SI (match_dup 1)
5409
@@ -1280,7 +1215,7 @@
5411
(define_insn "quad_halves_<code>v4sf"
5412
[(set (match_operand:V2SF 0 "s_register_operand" "=w")
5415
(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
5416
(parallel [(const_int 0) (const_int 1)]))
5417
(vec_select:V2SF (match_dup 1)
5418
@@ -1293,7 +1228,7 @@
5420
(define_insn "quad_halves_<code>v8hi"
5421
[(set (match_operand:V4HI 0 "s_register_operand" "+w")
5424
(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
5425
(parallel [(const_int 0) (const_int 1)
5426
(const_int 2) (const_int 3)]))
5427
@@ -1308,7 +1243,7 @@
5429
(define_insn "quad_halves_<code>v16qi"
5430
[(set (match_operand:V8QI 0 "s_register_operand" "+w")
5433
(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
5434
(parallel [(const_int 0) (const_int 1)
5435
(const_int 2) (const_int 3)
5436
@@ -2200,134 +2135,140 @@
5437
[(set_attr "type" "neon_sub_halve_narrow_q")]
5440
-(define_insn "neon_vceq<mode>"
5441
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
5442
- (unspec:<V_cmp_result>
5443
- [(match_operand:VDQW 1 "s_register_operand" "w,w")
5444
- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
5446
+;; These may expand to an UNSPEC pattern when a floating point mode is used
5447
+;; without unsafe math optimizations.
5448
+(define_expand "neon_vc<cmp_op><mode>"
5449
+ [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
5450
+ (neg:<V_cmp_result>
5451
+ (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
5452
+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
5455
- vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
5456
- vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
5457
- [(set (attr "type")
5458
- (if_then_else (match_test "<Is_float_mode>")
5459
- (const_string "neon_fp_compare_s<q>")
5460
- (if_then_else (match_operand 2 "zero_operand")
5461
- (const_string "neon_compare_zero<q>")
5462
- (const_string "neon_compare<q>"))))]
5464
+ /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
5466
+ if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
5467
+ && !flag_unsafe_math_optimizations)
5469
+ /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
5470
+ we define gen_neon_vceq<mode>_insn_unspec only for float modes
5471
+ whereas this expander iterates over the integer modes as well,
5472
+ but we will never expand to UNSPECs for the integer comparisons. */
5473
+ switch (<MODE>mode)
5476
+ emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
5481
+ emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
5486
+ gcc_unreachable ();
5490
+ emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
5497
-(define_insn "neon_vcge<mode>"
5498
+(define_insn "neon_vc<cmp_op><mode>_insn"
5499
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
5500
- (unspec:<V_cmp_result>
5501
- [(match_operand:VDQW 1 "s_register_operand" "w,w")
5502
- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
5506
- vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
5507
- vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
5508
+ (neg:<V_cmp_result>
5509
+ (COMPARISONS:<V_cmp_result>
5510
+ (match_operand:VDQW 1 "s_register_operand" "w,w")
5511
+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
5512
+ "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
5513
+ && !flag_unsafe_math_optimizations)"
5515
+ char pattern[100];
5516
+ sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
5517
+ " %%<V_reg>1, %s",
5518
+ GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
5519
+ ? "f" : "<cmp_type>",
5520
+ which_alternative == 0
5521
+ ? "%<V_reg>2" : "#0");
5522
+ output_asm_insn (pattern, operands);
5526
- (if_then_else (match_test "<Is_float_mode>")
5527
- (const_string "neon_fp_compare_s<q>")
5528
- (if_then_else (match_operand 2 "zero_operand")
5529
+ (if_then_else (match_operand 2 "zero_operand")
5530
(const_string "neon_compare_zero<q>")
5531
- (const_string "neon_compare<q>"))))]
5534
-(define_insn "neon_vcgeu<mode>"
5535
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5536
- (unspec:<V_cmp_result>
5537
- [(match_operand:VDQIW 1 "s_register_operand" "w")
5538
- (match_operand:VDQIW 2 "s_register_operand" "w")]
5541
- "vcge.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5542
- [(set_attr "type" "neon_compare<q>")]
5543
+ (const_string "neon_compare<q>")))]
5546
-(define_insn "neon_vcgt<mode>"
5547
+(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
5548
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
5549
(unspec:<V_cmp_result>
5550
- [(match_operand:VDQW 1 "s_register_operand" "w,w")
5551
- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
5553
+ [(match_operand:VCVTF 1 "s_register_operand" "w,w")
5554
+ (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
5558
- vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
5559
- vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
5560
- [(set (attr "type")
5561
- (if_then_else (match_test "<Is_float_mode>")
5562
- (const_string "neon_fp_compare_s<q>")
5563
- (if_then_else (match_operand 2 "zero_operand")
5564
- (const_string "neon_compare_zero<q>")
5565
- (const_string "neon_compare<q>"))))]
5567
+ char pattern[100];
5568
+ sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
5569
+ " %%<V_reg>1, %s",
5570
+ which_alternative == 0
5571
+ ? "%<V_reg>2" : "#0");
5572
+ output_asm_insn (pattern, operands);
5575
+ [(set_attr "type" "neon_fp_compare_s<q>")]
5578
-(define_insn "neon_vcgtu<mode>"
5579
+(define_insn "neon_vc<cmp_op>u<mode>"
5580
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5581
- (unspec:<V_cmp_result>
5582
- [(match_operand:VDQIW 1 "s_register_operand" "w")
5583
- (match_operand:VDQIW 2 "s_register_operand" "w")]
5585
+ (neg:<V_cmp_result>
5586
+ (GTUGEU:<V_cmp_result>
5587
+ (match_operand:VDQIW 1 "s_register_operand" "w")
5588
+ (match_operand:VDQIW 2 "s_register_operand" "w"))))]
5590
- "vcgt.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5591
+ "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5592
[(set_attr "type" "neon_compare<q>")]
5595
-;; VCLE and VCLT only support comparisons with immediate zero (register
5596
-;; variants are VCGE and VCGT with operands reversed).
5598
-(define_insn "neon_vcle<mode>"
5599
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5600
- (unspec:<V_cmp_result>
5601
- [(match_operand:VDQW 1 "s_register_operand" "w")
5602
- (match_operand:VDQW 2 "zero_operand" "Dz")]
5605
- "vcle.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
5606
- [(set (attr "type")
5607
- (if_then_else (match_test "<Is_float_mode>")
5608
- (const_string "neon_fp_compare_s<q>")
5609
- (if_then_else (match_operand 2 "zero_operand")
5610
- (const_string "neon_compare_zero<q>")
5611
- (const_string "neon_compare<q>"))))]
5614
-(define_insn "neon_vclt<mode>"
5615
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5616
- (unspec:<V_cmp_result>
5617
- [(match_operand:VDQW 1 "s_register_operand" "w")
5618
- (match_operand:VDQW 2 "zero_operand" "Dz")]
5620
+(define_expand "neon_vca<cmp_op><mode>"
5621
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
5622
+ (neg:<V_cmp_result>
5623
+ (GTGE:<V_cmp_result>
5624
+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
5625
+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
5627
- "vclt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
5628
- [(set (attr "type")
5629
- (if_then_else (match_test "<Is_float_mode>")
5630
- (const_string "neon_fp_compare_s<q>")
5631
- (if_then_else (match_operand 2 "zero_operand")
5632
- (const_string "neon_compare_zero<q>")
5633
- (const_string "neon_compare<q>"))))]
5635
+ if (flag_unsafe_math_optimizations)
5636
+ emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
5639
+ emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
5646
-(define_insn "neon_vcage<mode>"
5647
+(define_insn "neon_vca<cmp_op><mode>_insn"
5648
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5649
- (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
5650
- (match_operand:VCVTF 2 "s_register_operand" "w")]
5653
- "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5654
+ (neg:<V_cmp_result>
5655
+ (GTGE:<V_cmp_result>
5656
+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
5657
+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
5658
+ "TARGET_NEON && flag_unsafe_math_optimizations"
5659
+ "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5660
[(set_attr "type" "neon_fp_compare_s<q>")]
5663
-(define_insn "neon_vcagt<mode>"
5664
+(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
5665
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
5666
(unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
5667
(match_operand:VCVTF 2 "s_register_operand" "w")]
5671
- "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5672
+ "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5673
[(set_attr "type" "neon_fp_compare_s<q>")]
5676
--- a/src/gcc/config/arm/thumb2.md
5677
+++ b/src/gcc/config/arm/thumb2.md
5682
- [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")
5683
+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1")
5684
(set_attr "length" "2,4,2,4,4,4,4,4,4")
5685
(set_attr "predicable" "yes")
5686
(set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
5687
@@ -486,12 +486,12 @@
5690
(define_insn_and_split "*thumb2_movsicc_insn"
5691
- [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r")
5692
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
5694
(match_operator 3 "arm_comparison_operator"
5695
[(match_operand 4 "cc_register" "") (const_int 0)])
5696
- (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r")
5697
- (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))]
5698
+ (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r")
5699
+ (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))]
5702
it\\t%D3\;mov%D3\\t%0, %2
5703
@@ -504,12 +504,14 @@
5709
; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
5710
- ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
5711
- ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
5712
- ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
5713
- ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
5714
+ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
5715
+ ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
5716
+ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
5717
+ ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
5718
+ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
5719
"&& reload_completed"
5722
@@ -540,10 +542,30 @@
5726
- [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")
5727
- (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")
5728
+ [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6")
5729
+ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes")
5730
(set_attr "conds" "use")
5731
- (set_attr "type" "multiple")]
5732
+ (set_attr_alternative "type"
5733
+ [(if_then_else (match_operand 2 "const_int_operand" "")
5734
+ (const_string "mov_imm")
5735
+ (const_string "mov_reg"))
5736
+ (if_then_else (match_operand 1 "const_int_operand" "")
5737
+ (const_string "mov_imm")
5738
+ (const_string "mov_reg"))
5739
+ (if_then_else (match_operand 2 "const_int_operand" "")
5740
+ (const_string "mov_imm")
5741
+ (const_string "mov_reg"))
5742
+ (const_string "mvn_imm")
5743
+ (if_then_else (match_operand 1 "const_int_operand" "")
5744
+ (const_string "mov_imm")
5745
+ (const_string "mov_reg"))
5746
+ (const_string "mvn_imm")
5747
+ (const_string "multiple")
5748
+ (const_string "multiple")
5749
+ (const_string "multiple")
5750
+ (const_string "multiple")
5751
+ (const_string "multiple")
5752
+ (const_string "multiple")])]
5755
(define_insn "*thumb2_movsfcc_soft_insn"
5756
@@ -1182,7 +1204,11 @@
5758
[(set_attr "predicable" "yes")
5759
(set_attr "length" "2")
5760
- (set_attr "type" "alu_sreg")]
5761
+ (set_attr_alternative "type"
5762
+ [(if_then_else (match_operand 2 "const_int_operand" "")
5763
+ (const_string "alu_imm")
5764
+ (const_string "alu_sreg"))
5765
+ (const_string "alu_imm")])]
5768
(define_insn "*thumb2_subsi_short"
5769
@@ -1247,14 +1273,21 @@
5771
[(set_attr "conds" "set")
5772
(set_attr "length" "2,2,4")
5773
- (set_attr "type" "alu_sreg")]
5774
+ (set_attr_alternative "type"
5775
+ [(if_then_else (match_operand 2 "const_int_operand" "")
5776
+ (const_string "alus_imm")
5777
+ (const_string "alus_sreg"))
5778
+ (const_string "alus_imm")
5779
+ (if_then_else (match_operand 2 "const_int_operand" "")
5780
+ (const_string "alus_imm")
5781
+ (const_string "alus_sreg"))])]
5784
(define_insn "*thumb2_addsi3_compare0_scratch"
5785
[(set (reg:CC_NOOV CC_REGNUM)
5787
- (plus:SI (match_operand:SI 0 "s_register_operand" "l,l, r,r")
5788
- (match_operand:SI 1 "arm_add_operand" "Pv,l,IL,r"))
5789
+ (plus:SI (match_operand:SI 0 "s_register_operand" "l, r")
5790
+ (match_operand:SI 1 "arm_add_operand" "lPv,rIL"))
5794
@@ -1271,8 +1304,10 @@
5795
return \"cmn\\t%0, %1\";
5797
[(set_attr "conds" "set")
5798
- (set_attr "length" "2,2,4,4")
5799
- (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")]
5800
+ (set_attr "length" "2,4")
5801
+ (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "")
5802
+ (const_string "alus_imm")
5803
+ (const_string "alus_sreg")))]
5806
(define_insn "*thumb2_mulsi_short"
5807
--- a/src/gcc/config/arm/unknown-elf.h
5808
+++ b/src/gcc/config/arm/unknown-elf.h
5810
#define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
5812
#undef STARTFILE_SPEC
5813
-#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC
5814
+#define STARTFILE_SPEC \
5815
+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \
5816
+ UNKNOWN_ELF_STARTFILE_SPEC
5818
#define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s"
5822
ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \
5823
ASM_OUTPUT_LABEL (FILE, NAME); \
5824
- fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \
5825
+ fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1); \
5826
+ fprintf (FILE, "\t.size\t%s, %d\n", \
5827
+ NAME, SIZE ? (int) SIZE : 1); \
5831
--- a/src/gcc/config/glibc-stdint.h
5832
+++ b/src/gcc/config/glibc-stdint.h
5833
@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program;
5834
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
5835
<http://www.gnu.org/licenses/>. */
5837
+/* Systems using musl libc should use this header and make sure
5838
+ OPTION_MUSL is defined correctly before using the TYPE macros. */
5839
+#ifndef OPTION_MUSL
5840
+#define OPTION_MUSL 0
5843
#define SIG_ATOMIC_TYPE "int"
5845
#define INT8_TYPE "signed char"
5846
@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
5847
#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
5849
#define INT_FAST8_TYPE "signed char"
5850
-#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
5851
-#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
5852
+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
5853
+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")
5854
#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
5855
#define UINT_FAST8_TYPE "unsigned char"
5856
-#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
5857
-#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
5858
+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
5859
+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")
5860
#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
5862
#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
5863
--- a/src/gcc/config/linux.h
5864
+++ b/src/gcc/config/linux.h
5865
@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
5866
#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
5867
#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
5868
#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
5870
+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
5872
#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
5873
#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
5874
#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
5876
+#define OPTION_MUSL (linux_libc == LIBC_MUSL)
5879
#define GNU_USER_TARGET_OS_CPP_BUILTINS() \
5880
@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
5883
/* Determine which dynamic linker to use depending on whether GLIBC or
5884
- uClibc or Bionic is the default C library and whether
5885
- -muclibc or -mglibc or -mbionic has been passed to change the default. */
5886
+ uClibc or Bionic or musl is the default C library and whether
5887
+ -muclibc or -mglibc or -mbionic or -mmusl has been passed to change
5890
-#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3) \
5891
- "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"
5892
+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4) \
5893
+ "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}"
5895
#if DEFAULT_LIBC == LIBC_GLIBC
5896
-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
5897
- CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)
5898
+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
5899
+ CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M)
5900
#elif DEFAULT_LIBC == LIBC_UCLIBC
5901
-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
5902
- CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)
5903
+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
5904
+ CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M)
5905
#elif DEFAULT_LIBC == LIBC_BIONIC
5906
-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
5907
- CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)
5908
+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
5909
+ CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M)
5910
+#elif DEFAULT_LIBC == LIBC_MUSL
5911
+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \
5912
+ CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B)
5914
#error "Unsupported DEFAULT_LIBC"
5915
#endif /* DEFAULT_LIBC */
5916
@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
5917
#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
5918
#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
5919
#define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
5920
+/* Should be redefined for each target that supports musl. */
5921
+#define MUSL_DYNAMIC_LINKER "/dev/null"
5922
+#define MUSL_DYNAMIC_LINKER32 "/dev/null"
5923
+#define MUSL_DYNAMIC_LINKER64 "/dev/null"
5924
+#define MUSL_DYNAMIC_LINKERX32 "/dev/null"
5926
#define GNU_USER_DYNAMIC_LINKER \
5927
CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER, \
5928
- BIONIC_DYNAMIC_LINKER)
5929
+ BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER)
5930
#define GNU_USER_DYNAMIC_LINKER32 \
5931
CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \
5932
- BIONIC_DYNAMIC_LINKER32)
5933
+ BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32)
5934
#define GNU_USER_DYNAMIC_LINKER64 \
5935
CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \
5936
- BIONIC_DYNAMIC_LINKER64)
5937
+ BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64)
5938
#define GNU_USER_DYNAMIC_LINKERX32 \
5939
CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \
5940
- BIONIC_DYNAMIC_LINKERX32)
5941
+ BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32)
5943
/* Whether we have Bionic libc runtime */
5944
#undef TARGET_HAS_BIONIC
5945
#define TARGET_HAS_BIONIC (OPTION_BIONIC)
5947
+/* musl avoids problematic includes by rearranging the include directories.
5948
+ * Unfortunately, this is mostly duplicated from cppdefault.c */
5949
+#if DEFAULT_LIBC == LIBC_MUSL
5950
+#define INCLUDE_DEFAULTS_MUSL_GPP \
5951
+ { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, \
5952
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, \
5953
+ { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, \
5954
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 }, \
5955
+ { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, \
5956
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },
5958
+#ifdef LOCAL_INCLUDE_DIR
5959
+#define INCLUDE_DEFAULTS_MUSL_LOCAL \
5960
+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, \
5961
+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 },
5963
+#define INCLUDE_DEFAULTS_MUSL_LOCAL
5966
+#ifdef PREFIX_INCLUDE_DIR
5967
+#define INCLUDE_DEFAULTS_MUSL_PREFIX \
5968
+ { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0},
5970
+#define INCLUDE_DEFAULTS_MUSL_PREFIX
5973
+#ifdef CROSS_INCLUDE_DIR
5974
+#define INCLUDE_DEFAULTS_MUSL_CROSS \
5975
+ { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0},
5977
+#define INCLUDE_DEFAULTS_MUSL_CROSS
5980
+#ifdef TOOL_INCLUDE_DIR
5981
+#define INCLUDE_DEFAULTS_MUSL_TOOL \
5982
+ { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0},
5984
+#define INCLUDE_DEFAULTS_MUSL_TOOL
5987
+#ifdef NATIVE_SYSTEM_HEADER_DIR
5988
+#define INCLUDE_DEFAULTS_MUSL_NATIVE \
5989
+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 }, \
5990
+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 },
5992
+#define INCLUDE_DEFAULTS_MUSL_NATIVE
5995
+#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT)
5996
+# undef INCLUDE_DEFAULTS_MUSL_LOCAL
5997
+# define INCLUDE_DEFAULTS_MUSL_LOCAL
5998
+# undef INCLUDE_DEFAULTS_MUSL_NATIVE
5999
+# define INCLUDE_DEFAULTS_MUSL_NATIVE
6001
+# undef INCLUDE_DEFAULTS_MUSL_CROSS
6002
+# define INCLUDE_DEFAULTS_MUSL_CROSS
6005
+#undef INCLUDE_DEFAULTS
6006
+#define INCLUDE_DEFAULTS \
6008
+ INCLUDE_DEFAULTS_MUSL_GPP \
6009
+ INCLUDE_DEFAULTS_MUSL_PREFIX \
6010
+ INCLUDE_DEFAULTS_MUSL_CROSS \
6011
+ INCLUDE_DEFAULTS_MUSL_TOOL \
6012
+ INCLUDE_DEFAULTS_MUSL_NATIVE \
6013
+ { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 }, \
6014
+ { 0, 0, 0, 0, 0, 0 } \
6018
#if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */
6019
/* This is a *uclinux* target. We don't define below macros to normal linux
6020
versions, because doing so would require *uclinux* targets to include
6021
--- a/src/gcc/config/linux.opt
6022
+++ b/src/gcc/config/linux.opt
6023
@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)
6027
-Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)
6028
+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl)
6029
Use uClibc C library
6032
+Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic)
6034
--- a/src/gcc/config/mips/linux.h
6035
+++ b/src/gcc/config/mips/linux.h
6036
@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3. If not see
6037
#define UCLIBC_DYNAMIC_LINKERN32 \
6038
"%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"
6040
+#undef MUSL_DYNAMIC_LINKER32
6041
+#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1"
6042
+#undef MUSL_DYNAMIC_LINKER64
6043
+#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1"
6044
+#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1"
6046
#define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
6047
#define GNU_USER_DYNAMIC_LINKERN32 \
6048
CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
6049
- BIONIC_DYNAMIC_LINKERN32)
6050
+ BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32)
6051
--- a/src/gcc/config/rs6000/linux.h
6052
+++ b/src/gcc/config/rs6000/linux.h
6054
#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
6055
#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
6056
#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
6058
+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
6060
#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
6061
#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
6062
#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
6064
+#define OPTION_MUSL (linux_libc == LIBC_MUSL)
6067
/* Determine what functions are present at the runtime;
6068
--- a/src/gcc/config/rs6000/linux64.h
6069
+++ b/src/gcc/config/rs6000/linux64.h
6070
@@ -299,10 +299,14 @@ extern int dot_symbols;
6071
#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
6072
#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
6073
#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
6075
+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
6077
#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
6078
#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
6079
#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
6081
+#define OPTION_MUSL (linux_libc == LIBC_MUSL)
6084
/* Determine what functions are present at the runtime;
6085
--- a/src/gcc/configure
6086
+++ b/src/gcc/configure
6087
@@ -1699,7 +1699,8 @@ Optional Packages:
6088
use sysroot as the system root during the build
6089
--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
6090
--with-specs=SPECS add SPECS to driver command-line processing
6091
- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
6092
+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
6093
+ GCC `cat $srcdir/LINARO-VERSION`"
6094
--with-bugurl=URL Direct users to URL to report a bug
6095
--with-multilib-list select multilibs (AArch64, SH and x86-64 only)
6096
--with-gnu-ld assume the C compiler uses GNU ld default=no
6097
@@ -7362,7 +7363,7 @@ if test "${with_pkgversion+set}" = set; then :
6098
*) PKGVERSION="($withval) " ;;
6101
- PKGVERSION="(GCC) "
6102
+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
6106
@@ -18162,7 +18163,7 @@ else
6107
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
6108
lt_status=$lt_dlunknown
6109
cat > conftest.$ac_ext <<_LT_EOF
6110
-#line 18165 "configure"
6111
+#line 18166 "configure"
6112
#include "confdefs.h"
6115
@@ -18268,7 +18269,7 @@ else
6116
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
6117
lt_status=$lt_dlunknown
6118
cat > conftest.$ac_ext <<_LT_EOF
6119
-#line 18271 "configure"
6120
+#line 18272 "configure"
6121
#include "confdefs.h"
6124
@@ -27742,6 +27743,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then :
6126
gcc_cv_libc_provides_ssp=no
6129
+ # All versions of musl provide stack protector
6130
+ gcc_cv_libc_provides_ssp=yes;;
6131
*-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
6132
# glibc 2.4 and later provides __stack_chk_fail and
6133
# either __stack_chk_guard, or TLS access to stack guard canary.
6134
@@ -27774,6 +27778,7 @@ fi
6135
# <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
6136
# simply assert that glibc does provide this, which is true for all
6137
# realistically usable GNU/Hurd configurations.
6138
+ # All supported versions of musl provide it as well
6139
gcc_cv_libc_provides_ssp=yes;;
6140
*-*-darwin* | *-*-freebsd*)
6141
ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail"
6142
@@ -27870,6 +27875,9 @@ case "$target" in
6143
gcc_cv_target_dl_iterate_phdr=no
6147
+ gcc_cv_target_dl_iterate_phdr=yes
6151
if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
6152
--- a/src/gcc/configure.ac
6153
+++ b/src/gcc/configure.ac
6154
@@ -862,7 +862,7 @@ AC_ARG_WITH(specs,
6156
AC_SUBST(CONFIGURE_SPECS)
6158
-ACX_PKGVERSION([GCC])
6159
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
6160
ACX_BUGURL([http://gcc.gnu.org/bugs.html])
6162
# Sanity check enable_languages in case someone does not run the toplevel
6163
@@ -5229,6 +5229,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
6164
gcc_cv_libc_provides_ssp,
6165
[gcc_cv_libc_provides_ssp=no
6168
+ # All versions of musl provide stack protector
6169
+ gcc_cv_libc_provides_ssp=yes;;
6170
*-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)
6171
# glibc 2.4 and later provides __stack_chk_fail and
6172
# either __stack_chk_guard, or TLS access to stack guard canary.
6173
@@ -5255,6 +5258,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,
6174
# <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now
6175
# simply assert that glibc does provide this, which is true for all
6176
# realistically usable GNU/Hurd configurations.
6177
+ # All supported versions of musl provide it as well
6178
gcc_cv_libc_provides_ssp=yes;;
6179
*-*-darwin* | *-*-freebsd*)
6180
AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes],
6181
@@ -5328,6 +5332,9 @@ case "$target" in
6182
gcc_cv_target_dl_iterate_phdr=no
6186
+ gcc_cv_target_dl_iterate_phdr=yes
6189
GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR])
6190
if test x$gcc_cv_target_dl_iterate_phdr = xyes; then
6191
--- a/src/gcc/cp/Make-lang.in
6192
+++ b/src/gcc/cp/Make-lang.in
6193
@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets
6194
# List of targets that can use the generic check- rule and its // variant.
6195
lang_checks += check-g++
6196
lang_checks_parallelized += check-g++
6197
-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
6198
+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
6199
check_g++_parallelize = 10000
6202
@@ -221,6 +221,7 @@ c++.mostlyclean:
6204
-rm -f cp/*$(objext)
6205
-rm -f cp/*$(coverageexts)
6206
+ -rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext)
6209
-rm -f cp/config.status cp/Makefile
6210
--- a/src/gcc/cppbuiltin.c
6211
+++ b/src/gcc/cppbuiltin.c
6212
@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel)
6213
*patchlevel = s_patchlevel;
6216
+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
6217
+ to create Linaro release number YYYYMM and spin version. */
6219
+parse_linarover (int *release, int *spin)
6221
+ static int s_year = -1, s_month, s_spin;
6224
+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
6226
+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
6231
+ *release = s_year * 100 + s_month;
6237
/* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */
6239
define__GNUC__ (cpp_reader *pfile)
6241
- int major, minor, patchlevel;
6242
+ int major, minor, patchlevel, linaro_release, linaro_spin;
6244
parse_basever (&major, &minor, &patchlevel);
6245
+ parse_linarover (&linaro_release, &linaro_spin);
6246
cpp_define_formatted (pfile, "__GNUC__=%d", major);
6247
cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
6248
cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
6249
cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
6250
+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
6251
+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
6252
cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
6253
cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
6254
cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
6255
--- a/src/gcc/cprop.c
6256
+++ b/src/gcc/cprop.c
6257
@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x)
6258
return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
6261
+/* Determine whether the rtx X should be treated as a register that can
6262
+ be propagated. Any pseudo-register is fine. */
6265
+cprop_reg_p (const_rtx x)
6267
+ return REG_P (x) && !HARD_REGISTER_P (x);
6270
/* Scan SET present in INSN and add an entry to the hash TABLE.
6271
IMPLICIT is true if it's an implicit set, false otherwise. */
6273
@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
6274
rtx src = SET_SRC (set);
6275
rtx dest = SET_DEST (set);
6278
- && ! HARD_REGISTER_P (dest)
6279
+ if (cprop_reg_p (dest)
6280
&& reg_available_p (dest, insn)
6281
&& can_copy_p (GET_MODE (dest)))
6283
@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,
6284
src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
6286
/* Record sets for constant/copy propagation. */
6288
+ if ((cprop_reg_p (src)
6290
- && ! HARD_REGISTER_P (src)
6291
&& reg_available_p (src, insn))
6292
|| cprop_constant_p (src))
6293
insert_set_in_table (dest, src, insn, table, implicit);
6294
@@ -821,15 +828,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
6298
-/* Find a set of REGNOs that are available on entry to INSN's block. Return
6299
- NULL no such set is found. */
6300
+/* Find a set of REGNOs that are available on entry to INSN's block. If found,
6301
+ SET_RET[0] will be assigned a set with a register source and SET_RET[1] a
6302
+ set with a constant source. If not found the corresponding entry is set to
6305
-static struct cprop_expr *
6306
-find_avail_set (int regno, rtx_insn *insn)
6308
+find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2])
6310
- /* SET1 contains the last set found that can be returned to the caller for
6311
- use in a substitution. */
6312
- struct cprop_expr *set1 = 0;
6313
+ set_ret[0] = set_ret[1] = NULL;
6315
/* Loops are not possible here. To get a loop we would need two sets
6316
available at the start of the block containing INSN. i.e. we would
6317
@@ -869,8 +876,10 @@ find_avail_set (int regno, rtx_insn *insn)
6318
If the source operand changed, we may still use it for the next
6319
iteration of this loop, but we may not use it for substitutions. */
6321
- if (cprop_constant_p (src) || reg_not_set_p (src, insn))
6323
+ if (cprop_constant_p (src))
6325
+ else if (reg_not_set_p (src, insn))
6328
/* If the source of the set is anything except a register, then
6329
we have reached the end of the copy chain. */
6330
@@ -881,10 +890,6 @@ find_avail_set (int regno, rtx_insn *insn)
6331
and see if we have an available copy into SRC. */
6332
regno = REGNO (src);
6335
- /* SET1 holds the last set that was available and anticipatable at
6340
/* Subroutine of cprop_insn that tries to propagate constants into
6341
@@ -1050,40 +1055,40 @@ cprop_insn (rtx_insn *insn)
6342
int changed = 0, changed_this_round;
6346
- changed_this_round = 0;
6347
- reg_use_count = 0;
6348
- note_uses (&PATTERN (insn), find_used_regs, NULL);
6350
- /* We may win even when propagating constants into notes. */
6351
- note = find_reg_equal_equiv_note (insn);
6353
- find_used_regs (&XEXP (note, 0), NULL);
6355
- for (i = 0; i < reg_use_count; i++)
6358
- rtx reg_used = reg_use_table[i];
6359
- unsigned int regno = REGNO (reg_used);
6361
- struct cprop_expr *set;
6362
+ changed_this_round = 0;
6363
+ reg_use_count = 0;
6364
+ note_uses (&PATTERN (insn), find_used_regs, NULL);
6366
- /* If the register has already been set in this block, there's
6367
- nothing we can do. */
6368
- if (! reg_not_set_p (reg_used, insn))
6370
+ /* We may win even when propagating constants into notes. */
6371
+ note = find_reg_equal_equiv_note (insn);
6373
+ find_used_regs (&XEXP (note, 0), NULL);
6375
- /* Find an assignment that sets reg_used and is available
6376
- at the start of the block. */
6377
- set = find_avail_set (regno, insn);
6380
+ for (i = 0; i < reg_use_count; i++)
6382
+ rtx reg_used = reg_use_table[i];
6383
+ unsigned int regno = REGNO (reg_used);
6384
+ rtx src_cst = NULL, src_reg = NULL;
6385
+ struct cprop_expr *set[2];
6388
+ /* If the register has already been set in this block, there's
6389
+ nothing we can do. */
6390
+ if (! reg_not_set_p (reg_used, insn))
6393
- /* Constant propagation. */
6394
- if (cprop_constant_p (src))
6396
- if (constprop_register (reg_used, src, insn))
6397
+ /* Find an assignment that sets reg_used and is available
6398
+ at the start of the block. */
6399
+ find_avail_set (regno, insn, set);
6401
+ src_reg = set[0]->src;
6403
+ src_cst = set[1]->src;
6405
+ /* Constant propagation. */
6406
+ if (src_cst && cprop_constant_p (src_cst)
6407
+ && constprop_register (reg_used, src_cst, insn))
6409
changed_this_round = changed = 1;
6410
global_const_prop_count++;
6411
@@ -1093,18 +1098,16 @@ retry:
6412
"GLOBAL CONST-PROP: Replacing reg %d in ", regno);
6413
fprintf (dump_file, "insn %d with constant ",
6415
- print_rtl (dump_file, src);
6416
+ print_rtl (dump_file, src_cst);
6417
fprintf (dump_file, "\n");
6419
if (insn->deleted ())
6423
- else if (REG_P (src)
6424
- && REGNO (src) >= FIRST_PSEUDO_REGISTER
6425
- && REGNO (src) != regno)
6427
- if (try_replace_reg (reg_used, src, insn))
6428
+ /* Copy propagation. */
6429
+ else if (src_reg && cprop_reg_p (src_reg)
6430
+ && REGNO (src_reg) != regno
6431
+ && try_replace_reg (reg_used, src_reg, insn))
6433
changed_this_round = changed = 1;
6434
global_copy_prop_count++;
6435
@@ -1113,7 +1116,7 @@ retry:
6437
"GLOBAL COPY-PROP: Replacing reg %d in insn %d",
6438
regno, INSN_UID (insn));
6439
- fprintf (dump_file, " with reg %d\n", REGNO (src));
6440
+ fprintf (dump_file, " with reg %d\n", REGNO (src_reg));
6443
/* The original insn setting reg_used may or may not now be
6444
@@ -1123,12 +1126,10 @@ retry:
6445
and made things worse. */
6449
- /* If try_replace_reg simplified the insn, the regs found
6450
- by find_used_regs may not be valid anymore. Start over. */
6451
- if (changed_this_round)
6454
+ /* If try_replace_reg simplified the insn, the regs found by find_used_regs
6455
+ may not be valid anymore. Start over. */
6456
+ while (changed_this_round);
6458
if (changed && DEBUG_INSN_P (insn))
6460
@@ -1191,7 +1192,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
6461
/* Rule out USE instructions and ASM statements as we don't want to
6462
change the hard registers mentioned. */
6464
- && (REGNO (x) >= FIRST_PSEUDO_REGISTER
6465
+ && (cprop_reg_p (x)
6466
|| (GET_CODE (PATTERN (insn)) != USE
6467
&& asm_noperands (PATTERN (insn)) < 0)))
6469
@@ -1207,7 +1208,7 @@ do_local_cprop (rtx x, rtx_insn *insn)
6471
if (cprop_constant_p (this_rtx))
6473
- if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
6474
+ if (cprop_reg_p (this_rtx)
6475
/* Don't copy propagate if it has attached REG_EQUIV note.
6476
At this point this only function parameters should have
6477
REG_EQUIV notes and if the argument slot is used somewhere
6478
@@ -1328,9 +1329,8 @@ implicit_set_cond_p (const_rtx cond)
6479
if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE)
6482
- /* The first operand of COND must be a pseudo-reg. */
6483
- if (! REG_P (XEXP (cond, 0))
6484
- || HARD_REGISTER_P (XEXP (cond, 0)))
6485
+ /* The first operand of COND must be a register we can propagate. */
6486
+ if (!cprop_reg_p (XEXP (cond, 0)))
6489
/* The second operand of COND must be a suitable constant. */
6490
--- a/src/gcc/df-core.c
6491
+++ b/src/gcc/df-core.c
6492
@@ -642,7 +642,6 @@ void
6493
df_finish_pass (bool verify ATTRIBUTE_UNUSED)
6498
#ifdef ENABLE_DF_CHECKING
6500
@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED)
6501
saved_flags = df->changeable_flags;
6504
- for (i = 0; i < df->num_problems_defined; i++)
6505
+ /* We iterate over problems by index as each problem removed will
6506
+ lead to problems_in_order to be reordered. */
6507
+ for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++)
6509
- struct dataflow *dflow = df->problems_in_order[i];
6510
- struct df_problem *problem = dflow->problem;
6511
+ struct dataflow *dflow = df->problems_by_index[i];
6513
- if (dflow->optional_p)
6515
- gcc_assert (problem->remove_problem_fun);
6516
- (problem->remove_problem_fun) ();
6517
- df->problems_in_order[i] = NULL;
6518
- df->problems_by_index[problem->id] = NULL;
6521
+ if (dflow && dflow->optional_p)
6522
+ df_remove_problem (dflow);
6524
- df->num_problems_defined -= removed;
6526
/* Clear all of the flags. */
6527
df->changeable_flags = 0;
6528
--- a/src/gcc/fortran/Make-lang.in
6529
+++ b/src/gcc/fortran/Make-lang.in
6530
@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets
6531
check-fortran-subtargets : check-gfortran-subtargets
6532
lang_checks += check-gfortran
6533
lang_checks_parallelized += check-gfortran
6534
-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.
6535
+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.
6536
check_gfortran_parallelize = 10000
6538
# GFORTRAN documentation.
6539
@@ -275,7 +275,7 @@ fortran.uninstall:
6540
# We just have to delete files specific to us.
6542
fortran.mostlyclean:
6543
- -rm -f f951$(exeext)
6544
+ -rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext)
6548
--- a/src/gcc/genpreds.c
6549
+++ b/src/gcc/genpreds.c
6550
@@ -640,12 +640,14 @@ struct constraint_data
6551
const char *regclass; /* for register constraints */
6552
rtx exp; /* for other constraints */
6553
unsigned int lineno; /* line of definition */
6554
- unsigned int is_register : 1;
6555
- unsigned int is_const_int : 1;
6556
- unsigned int is_const_dbl : 1;
6557
- unsigned int is_extra : 1;
6558
- unsigned int is_memory : 1;
6559
- unsigned int is_address : 1;
6560
+ unsigned int is_register : 1;
6561
+ unsigned int is_const_int : 1;
6562
+ unsigned int is_const_dbl : 1;
6563
+ unsigned int is_extra : 1;
6564
+ unsigned int is_memory : 1;
6565
+ unsigned int is_address : 1;
6566
+ unsigned int maybe_allows_reg : 1;
6567
+ unsigned int maybe_allows_mem : 1;
6570
/* Overview of all constraints beginning with a given letter. */
6571
@@ -691,6 +693,9 @@ static unsigned int satisfied_start;
6572
static unsigned int const_int_start, const_int_end;
6573
static unsigned int memory_start, memory_end;
6574
static unsigned int address_start, address_end;
6575
+static unsigned int maybe_allows_none_start, maybe_allows_none_end;
6576
+static unsigned int maybe_allows_reg_start, maybe_allows_reg_end;
6577
+static unsigned int maybe_allows_mem_start, maybe_allows_mem_end;
6579
/* Convert NAME, which contains angle brackets and/or underscores, to
6580
a string that can be used as part of a C identifier. The string
6581
@@ -711,6 +716,34 @@ mangle (const char *name)
6582
return XOBFINISH (rtl_obstack, const char *);
6585
+/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP
6586
+ maybe allows a MEM. Bits should be clear only when we are sure it
6587
+ will not allow a REG/SUBREG or a MEM. */
6589
+compute_maybe_allows (rtx exp)
6591
+ switch (GET_CODE (exp))
6593
+ case IF_THEN_ELSE:
6594
+ /* Conservative answer is like IOR, of the THEN and ELSE branches. */
6595
+ return compute_maybe_allows (XEXP (exp, 1))
6596
+ | compute_maybe_allows (XEXP (exp, 2));
6598
+ return compute_maybe_allows (XEXP (exp, 0))
6599
+ & compute_maybe_allows (XEXP (exp, 1));
6601
+ return compute_maybe_allows (XEXP (exp, 0))
6602
+ | compute_maybe_allows (XEXP (exp, 1));
6604
+ if (*XSTR (exp, 1) == '\0')
6605
+ return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0)
6606
+ | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0);
6613
/* Add one constraint, of any sort, to the tables. NAME is its name;
6614
REGCLASS is the register class, if any; EXP is the expression to
6615
test, if any; IS_MEMORY and IS_ADDRESS indicate memory and address
6616
@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass,
6617
c->is_extra = !(regclass || is_const_int || is_const_dbl);
6618
c->is_memory = is_memory;
6619
c->is_address = is_address;
6620
+ int maybe_allows = 3;
6622
+ maybe_allows = compute_maybe_allows (exp);
6623
+ c->maybe_allows_reg = (maybe_allows & 1) != 0;
6624
+ c->maybe_allows_mem = (maybe_allows & 2) != 0;
6626
c->next_this_letter = *slot;
6628
@@ -940,8 +978,30 @@ choose_enum_order (void)
6629
enum_order[next++] = c;
6632
+ maybe_allows_none_start = next;
6633
+ FOR_ALL_CONSTRAINTS (c)
6634
+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
6635
+ && !c->maybe_allows_reg && !c->maybe_allows_mem)
6636
+ enum_order[next++] = c;
6637
+ maybe_allows_none_end = next;
6639
+ maybe_allows_reg_start = next;
6640
+ FOR_ALL_CONSTRAINTS (c)
6641
+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
6642
+ && c->maybe_allows_reg && !c->maybe_allows_mem)
6643
+ enum_order[next++] = c;
6644
+ maybe_allows_reg_end = next;
6646
+ maybe_allows_mem_start = next;
6647
+ FOR_ALL_CONSTRAINTS (c)
6648
+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
6649
+ && !c->maybe_allows_reg && c->maybe_allows_mem)
6650
+ enum_order[next++] = c;
6651
+ maybe_allows_mem_end = next;
6653
FOR_ALL_CONSTRAINTS (c)
6654
- if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address)
6655
+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address
6656
+ && c->maybe_allows_reg && c->maybe_allows_mem)
6657
enum_order[next++] = c;
6658
gcc_assert (next == num_constraints);
6660
@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end)
6664
+/* Write a definition for insn_extra_constraint_allows_reg_mem function. */
6666
+write_allows_reg_mem_function (void)
6668
+ printf ("static inline void\n"
6669
+ "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n"
6670
+ "\t\t\t\t bool *allows_reg, bool *allows_mem)\n"
6672
+ if (maybe_allows_none_start != maybe_allows_none_end)
6673
+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
6675
+ enum_order[maybe_allows_none_start]->c_name,
6676
+ enum_order[maybe_allows_none_end - 1]->c_name);
6677
+ if (maybe_allows_reg_start != maybe_allows_reg_end)
6678
+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
6680
+ " *allows_reg = true;\n"
6683
+ enum_order[maybe_allows_reg_start]->c_name,
6684
+ enum_order[maybe_allows_reg_end - 1]->c_name);
6685
+ if (maybe_allows_mem_start != maybe_allows_mem_end)
6686
+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"
6688
+ " *allows_mem = true;\n"
6691
+ enum_order[maybe_allows_mem_start]->c_name,
6692
+ enum_order[maybe_allows_mem_end - 1]->c_name);
6693
+ printf (" (void) c;\n"
6694
+ " *allows_reg = true;\n"
6695
+ " *allows_mem = true;\n"
6699
/* VEC is a list of key/value pairs, with the keys being lower bounds
6700
of a range. Output a decision tree that handles the keys covered by
6701
[VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s.
6702
@@ -1326,6 +1421,7 @@ write_tm_preds_h (void)
6703
memory_start, memory_end);
6704
write_range_function ("insn_extra_address_constraint",
6705
address_start, address_end);
6706
+ write_allows_reg_mem_function ();
6708
if (constraint_max_namelen > 1)
6710
--- a/src/gcc/go/Make-lang.in
6711
+++ b/src/gcc/go/Make-lang.in
6712
@@ -197,6 +197,7 @@ go.uninstall:
6714
-rm -f go/*$(objext)
6715
-rm -f go/*$(coverageexts)
6716
+ -rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext)
6719
go.maintainer-clean:
6720
--- a/src/gcc/ira-costs.c
6721
+++ b/src/gcc/ira-costs.c
6722
@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
6723
rtx dest = SET_DEST (set);
6724
rtx src = SET_SRC (set);
6726
- dest = SET_DEST (set);
6727
- src = SET_SRC (set);
6728
if (GET_CODE (dest) == SUBREG
6729
&& (GET_MODE_SIZE (GET_MODE (dest))
6730
== GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
6731
--- a/src/gcc/jit/Make-lang.in
6732
+++ b/src/gcc/jit/Make-lang.in
6733
@@ -285,6 +285,10 @@ jit.uninstall:
6734
# We just have to delete files specific to us.
6737
+ -rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK)
6738
+ -rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME)
6739
+ -rm -f $(LIBGCCJIT_SONAME)
6740
+ -rm -f $(jit_OBJS)
6744
--- a/src/gcc/loop-invariant.c
6745
+++ b/src/gcc/loop-invariant.c
6746
@@ -740,8 +740,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
6747
enough to not regress 410.bwaves either (by still moving reg+reg
6749
See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */
6750
- inv->cheap_address = address_cost (SET_SRC (set), word_mode,
6751
- ADDR_SPACE_GENERIC, speed) < 3;
6752
+ if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
6753
+ inv->cheap_address = address_cost (SET_SRC (set), word_mode,
6754
+ ADDR_SPACE_GENERIC, speed) < 3;
6756
+ inv->cheap_address = false;
6760
@@ -1174,6 +1177,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
6763
if (!inv->cheap_address
6764
+ || inv->def->n_uses == 0
6765
|| inv->def->n_addr_uses < inv->def->n_uses)
6766
(*comp_cost) += inv->cost * inv->eqno;
6768
@@ -1512,6 +1516,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group)
6772
+/* Whether invariant INV setting REG can be moved out of LOOP, at the end of
6773
+ the block preceding its header. */
6776
+can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
6779
+ unsigned int dest_regno, defs_in_loop_count = 0;
6780
+ rtx_insn *insn = inv->insn;
6781
+ basic_block bb = BLOCK_FOR_INSN (inv->insn);
6783
+ /* We ignore hard register and memory access for cost and complexity reasons.
6784
+ Hard register are few at this stage and expensive to consider as they
6785
+ require building a separate data flow. Memory access would require using
6786
+ df_simulate_* and can_move_insns_across functions and is more complex. */
6787
+ if (!REG_P (reg) || HARD_REGISTER_P (reg))
6790
+ /* Check whether the set is always executed. We could omit this condition if
6791
+ we know that the register is unused outside of the loop, but it does not
6792
+ seem worth finding out. */
6793
+ if (!inv->always_executed)
6796
+ /* Check that all uses that would be dominated by def are already dominated
6798
+ dest_regno = REGNO (reg);
6799
+ for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
6801
+ rtx_insn *use_insn;
6802
+ basic_block use_bb;
6804
+ use_insn = DF_REF_INSN (use);
6805
+ use_bb = BLOCK_FOR_INSN (use_insn);
6807
+ /* Ignore instruction considered for moving. */
6808
+ if (use_insn == insn)
6811
+ /* Don't consider uses outside loop. */
6812
+ if (!flow_bb_inside_loop_p (loop, use_bb))
6815
+ /* Don't move if a use is not dominated by def in insn. */
6816
+ if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
6818
+ if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
6822
+ /* Check for other defs. Any other def in the loop might reach a use
6823
+ currently reached by the def in insn. */
6824
+ for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
6826
+ basic_block def_bb = DF_REF_BB (def);
6828
+ /* Defs in exit block cannot reach a use they weren't already. */
6829
+ if (single_succ_p (def_bb))
6831
+ basic_block def_bb_succ;
6833
+ def_bb_succ = single_succ (def_bb);
6834
+ if (!flow_bb_inside_loop_p (loop, def_bb_succ))
6838
+ if (++defs_in_loop_count > 1)
6845
/* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false
6848
@@ -1545,11 +1622,8 @@ move_invariant_reg (struct loop *loop, unsigned invno)
6852
- /* Move the set out of the loop. If the set is always executed (we could
6853
- omit this condition if we know that the register is unused outside of
6854
- the loop, but it does not seem worth finding out) and it has no uses
6855
- that would not be dominated by it, we may just move it (TODO).
6856
- Otherwise we need to create a temporary register. */
6857
+ /* If possible, just move the set out of the loop. Otherwise, we
6858
+ need to create a temporary register. */
6859
set = single_set (inv->insn);
6860
reg = dest = SET_DEST (set);
6861
if (GET_CODE (reg) == SUBREG)
6862
@@ -1557,19 +1631,25 @@ move_invariant_reg (struct loop *loop, unsigned invno)
6864
regno = REGNO (reg);
6866
- reg = gen_reg_rtx_and_attrs (dest);
6867
+ if (!can_move_invariant_reg (loop, inv, reg))
6869
+ reg = gen_reg_rtx_and_attrs (dest);
6871
- /* Try replacing the destination by a new pseudoregister. */
6872
- validate_change (inv->insn, &SET_DEST (set), reg, true);
6873
+ /* Try replacing the destination by a new pseudoregister. */
6874
+ validate_change (inv->insn, &SET_DEST (set), reg, true);
6876
- /* As well as all the dominated uses. */
6877
- replace_uses (inv, reg, true);
6878
+ /* As well as all the dominated uses. */
6879
+ replace_uses (inv, reg, true);
6881
- /* And validate all the changes. */
6882
- if (!apply_change_group ())
6884
+ /* And validate all the changes. */
6885
+ if (!apply_change_group ())
6888
- emit_insn_after (gen_move_insn (dest, reg), inv->insn);
6889
+ emit_insn_after (gen_move_insn (dest, reg), inv->insn);
6891
+ else if (dump_file)
6892
+ fprintf (dump_file, "Invariant %d moved without introducing a new "
6893
+ "temporary register\n", invno);
6894
reorder_insns (inv->insn, inv->insn, BB_END (preheader));
6896
/* If there is a REG_EQUAL note on the insn we just moved, and the
6897
--- a/src/gcc/lra-constraints.c
6898
+++ b/src/gcc/lra-constraints.c
6899
@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass,
6903
- // ??? Is this assert right
6904
- // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass]));
6905
+ lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set));
6906
COPY_HARD_REG_SET (temp, set);
6907
AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs);
6908
return (hard_reg_set_subset_p
6909
--- a/src/gcc/objc/Make-lang.in
6910
+++ b/src/gcc/objc/Make-lang.in
6911
@@ -114,6 +114,7 @@ objc.uninstall:
6913
-rm -f objc/*$(objext) objc/xforward objc/fflags
6914
-rm -f objc/*$(coverageexts)
6915
+ -rm -f cc1obj$(exeext)
6916
objc.clean: objc.mostlyclean
6917
-rm -rf objc-headers
6919
--- a/src/gcc/objcp/Make-lang.in
6920
+++ b/src/gcc/objcp/Make-lang.in
6921
@@ -142,6 +142,7 @@ obj-c++.uninstall:
6922
obj-c++.mostlyclean:
6923
-rm -f objcp/*$(objext)
6924
-rm -f objcp/*$(coverageexts)
6925
+ -rm -f cc1objplus$(exeext)
6926
obj-c++.clean: obj-c++.mostlyclean
6928
-rm -f objcp/config.status objcp/Makefile
6929
--- a/src/gcc/optabs.c
6930
+++ b/src/gcc/optabs.c
6931
@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
6933
struct expand_operand ops[2];
6934
rtx rtx_op0, rtx_op1;
6935
+ machine_mode m0, m1;
6936
enum rtx_code rcode = get_rtx_code (tcode, unsignedp);
6938
gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison);
6940
- /* Expand operands. */
6941
+ /* Expand operands. For vector types with scalar modes, e.g. where int64x1_t
6942
+ has mode DImode, this can produce a constant RTX of mode VOIDmode; in such
6943
+ cases, use the original mode. */
6944
rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)),
6946
+ m0 = GET_MODE (rtx_op0);
6947
+ if (m0 == VOIDmode)
6948
+ m0 = TYPE_MODE (TREE_TYPE (t_op0));
6950
rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)),
6952
+ m1 = GET_MODE (rtx_op1);
6953
+ if (m1 == VOIDmode)
6954
+ m1 = TYPE_MODE (TREE_TYPE (t_op1));
6956
- create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0));
6957
- create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1));
6958
+ create_input_operand (&ops[0], rtx_op0, m0);
6959
+ create_input_operand (&ops[1], rtx_op1, m1);
6960
if (!maybe_legitimize_operands (icode, 4, 2, ops))
6962
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
6963
--- a/src/gcc/params.def
6964
+++ b/src/gcc/params.def
6965
@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH,
6966
"Maximum depth of search in the dominator tree for expressions to hoist",
6970
+/* When synthesizing expnonentiation by a real constant operations using square
6971
+ roots, this controls how deep sqrt chains we are willing to generate. */
6972
+DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH,
6973
+ "max-pow-sqrt-depth",
6974
+ "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant",
6977
/* This parameter limits the number of insns in a loop that will be unrolled,
6978
and by how much the loop is unrolled.
6980
--- a/src/gcc/rtlanal.c
6981
+++ b/src/gcc/rtlanal.c
6982
@@ -104,7 +104,10 @@ generic_subrtx_iterator <T>::add_single_to_queue (array_type &array,
6985
gcc_checking_assert (i == LOCAL_ELEMS);
6986
- vec_safe_grow (array.heap, i + 1);
6987
+ /* A previous iteration might also have moved from the stack to the
6988
+ heap, in which case the heap array will already be big enough. */
6989
+ if (vec_safe_length (array.heap) <= i)
6990
+ vec_safe_grow (array.heap, i + 1);
6991
base = array.heap->address ();
6992
memcpy (base, array.stack, sizeof (array.stack));
6993
base[LOCAL_ELEMS] = x;
6994
--- a/src/gcc/simplify-rtx.c
6995
+++ b/src/gcc/simplify-rtx.c
6996
@@ -1171,7 +1171,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
6997
= (float_truncate:SF foo:DF).
6999
(float_truncate:DF (float_extend:XF foo:SF))
7000
- = (float_extend:SF foo:DF). */
7001
+ = (float_extend:DF foo:SF). */
7002
if ((GET_CODE (op) == FLOAT_TRUNCATE
7003
&& flag_unsafe_math_optimizations)
7004
|| GET_CODE (op) == FLOAT_EXTEND)
7005
@@ -1183,14 +1183,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
7006
XEXP (op, 0), mode);
7008
/* (float_truncate (float x)) is (float x) */
7009
- if (GET_CODE (op) == FLOAT
7010
+ if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
7011
&& (flag_unsafe_math_optimizations
7012
|| (SCALAR_FLOAT_MODE_P (GET_MODE (op))
7013
&& ((unsigned)significand_size (GET_MODE (op))
7014
>= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
7015
- num_sign_bit_copies (XEXP (op, 0),
7016
GET_MODE (XEXP (op, 0))))))))
7017
- return simplify_gen_unary (FLOAT, mode,
7018
+ return simplify_gen_unary (GET_CODE (op), mode,
7020
GET_MODE (XEXP (op, 0)));
7022
@@ -1221,7 +1221,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
7023
rounding can't happen.
7025
if (GET_CODE (op) == FLOAT_EXTEND
7026
- || (GET_CODE (op) == FLOAT
7027
+ || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)
7028
&& SCALAR_FLOAT_MODE_P (GET_MODE (op))
7029
&& ((unsigned)significand_size (GET_MODE (op))
7030
>= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))
7031
--- a/src/gcc/stmt.c
7032
+++ b/src/gcc/stmt.c
7033
@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num,
7034
else if (insn_extra_memory_constraint (cn))
7038
- /* Otherwise we can't assume anything about the nature of
7039
- the constraint except that it isn't purely registers.
7040
- Treat it like "g" and hope for the best. */
7041
- *allows_reg = true;
7042
- *allows_mem = true;
7044
+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
7048
@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num,
7049
else if (insn_extra_memory_constraint (cn))
7053
- /* Otherwise we can't assume anything about the nature of
7054
- the constraint except that it isn't purely registers.
7055
- Treat it like "g" and hope for the best. */
7056
- *allows_reg = true;
7057
- *allows_mem = true;
7059
+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);
7063
--- a/src/gcc/target.def
7064
+++ b/src/gcc/target.def
7065
@@ -1975,7 +1975,7 @@ merging.",
7068
"If defined, this target hook points to an array of @samp{struct\n\
7069
-attribute_spec} (defined in @file{tree.h}) specifying the machine\n\
7070
+attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\
7071
specific attributes for this target and some of the restrictions on the\n\
7072
entities to which these attributes are applied and the arguments they\n\
7075
+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c
7077
+/* PR target/65648 */
7079
+int a = 0, *b = 0, c = 0;
7082
+static long long f = 0;
7084
+unsigned char j = 0;
7086
+__attribute__((noinline, noclone)) void
7087
+foo (int x, int *y)
7089
+ asm volatile ("" : : "r" (x), "r" (y) : "memory");
7092
+__attribute__((noinline, noclone)) void
7093
+bar (const char *x, long long y)
7095
+ asm volatile ("" : : "r" (x), "r" (&y) : "memory");
7097
+ __builtin_abort ();
7105
+ j = (!a) - (c <= e);
7112
+++ b/src/gcc/testsuite/gcc.dg/loop-8.c
7114
+/* { dg-do compile } */
7115
+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
7122
+ for (i = 0; i < 100; i++)
7133
+/* Load of 42 is moved out of the loop, introducing a new pseudo register. */
7134
+/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */
7135
+/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */
7136
+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
7139
+++ b/src/gcc/testsuite/gcc.dg/loop-9.c
7141
+/* { dg-do compile } */
7142
+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
7148
+ for (i = 0; i < 100; i++)
7152
+/* Load of x is moved out of the loop. */
7153
+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */
7154
+/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */
7155
+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
7158
+++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c
7160
+/* { dg-do compile { target x86_64-*-* } } */
7161
+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
7162
+/* NOTE: The target list above could be extended to other targets that have
7163
+ conditional moves, but don't have zero registers. */
7173
+ enum test_type type;
7178
+ struct type_node *referring;
7183
+ struct test_node *next;
7186
+int iterate (struct test_node *, unsigned, struct test_ref **);
7189
+loop_invar (struct test_node *node)
7191
+ struct test_ref *ref;
7193
+ for (unsigned i = 0; iterate (node, i, &ref); i++)
7194
+ if (loop_invar ((ref->referring && ref->referring->type == TYPE0)
7195
+ ? ((struct test_node *) (ref->referring)) : 0))
7201
+/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */
7202
+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
7204
+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c
7206
+/* { dg-do run } */
7207
+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
7209
+#define EXPN (-6 * (0.5*0.5*0.5*0.5))
7211
+#include "pow-sqrt.x"
7213
+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c
7215
+/* { dg-do run } */
7216
+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */
7218
+#define EXPN (-5.875)
7219
+#include "pow-sqrt.x"
7221
+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c
7223
+/* { dg-do run } */
7224
+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */
7226
+#define EXPN (1.25)
7227
+#include "pow-sqrt.x"
7229
+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x
7232
+extern void abort (void);
7235
+__attribute__((noinline)) double
7236
+real_pow (double x, double pow_exp)
7238
+ return __builtin_pow (x, pow_exp);
7241
+#define EPS (0.000000000000000000001)
7243
+#define SYNTH_POW(X, Y) __builtin_pow (X, Y)
7244
+volatile double arg;
7249
+ double i_arg = 0.1;
7251
+ for (arg = i_arg; arg < 100.0; arg += 1.0)
7253
+ double synth_res = SYNTH_POW (arg, EXPN);
7254
+ double real_res = real_pow (arg, EXPN);
7256
+ if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS)
7262
+++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c
7264
+/* { dg-do compile } */
7265
+/* { dg-options "" } */
7266
+/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */
7269
+f0a (char *result, char *arg1, char *arg4, char temp_6)
7272
+ for (idx = 0; idx < 416; idx += 1)
7273
+ result[idx] = (arg1[idx] + arg4[idx]) * temp_6;
7276
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
7278
+/* { dg-do compile } */
7279
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
7281
+void foo (double *p)
7284
+ for (i = -20000; i < 200000; i+= 40)
7329
+/* We should groups address type IV uses. */
7330
+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } } */
7331
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
7332
--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
7333
+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
7336
vf2_t vf2 = (vf2_t){ 17.f, 18.f };
7337
vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead };
7338
+vlf1_t vlf1 = (vlf1_t) { 17.0 };
7340
union int128_t qword;
7342
int *int_ptr = (int *)0xabcdef0123456789ULL;
7343
@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11, long double, 98765432123456789.987654321L, Q0, flat)
7344
FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64)
7345
FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128)
7346
FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat)
7347
+FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat)
7349
--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
7350
+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
7351
@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16)));
7352
/* 128-bit vector of 4 ints. */
7353
typedef int vi4_t __attribute__((vector_size (16)));
7355
+/* 128-bit vector of 1 quad precision float. */
7356
+typedef long double vlf1_t __attribute__((vector_size (16)));
7358
/* signed quad-word (in an union for the convenience of initialization). */
7361
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
7362
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
7363
@@ -27,14 +27,26 @@ load_lib gcc-dg.exp
7366
load_lib c-torture.exp
7367
-load_lib target-supports.exp
7368
-load_lib torture-options.exp
7372
-if {[istarget arm*-*-*]
7373
- && ![check_effective_target_arm_neon_ok]} then {
7375
+# The default action for a test is 'run'. Save current default.
7376
+global dg-do-what-default
7377
+set save-dg-do-what-default ${dg-do-what-default}
7379
+# For ARM, make sure that we have a target compatible with NEON, and do
7380
+# not attempt to run execution tests if the hardware doesn't support it.
7381
+if {[istarget arm*-*-*]} then {
7382
+ if {![check_effective_target_arm_neon_ok]} then {
7385
+ if {![is-effective-target arm_neon_hw]} then {
7386
+ set dg-do-what-default compile
7388
+ set dg-do-what-default run
7391
+ set dg-do-what-default run
7395
@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
7396
set additional_flags [add_options_for_arm_neon ""]
7399
-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
7400
- # If we're only testing specific files and this isn't one of them, skip it.
7401
- if ![runtest_file_p $runtests $src] then {
7405
- # runtest_file_p is already run above, and the code below can run
7406
- # runtest_file_p again, make sure everything for this test is
7407
- # performed if the above runtest_file_p decided this runtest
7408
- # instance should execute the test
7409
- gcc_parallel_test_enable 0
7410
- c-torture-execute $src $additional_flags
7411
- gcc-dg-runtest $src "" $additional_flags
7412
- gcc_parallel_test_enable 1
7414
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
7415
+ "" ${additional_flags}
7418
+set dg-do-what-default ${save-dg-do-what-default}
7422
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c
7424
+#include <arm_neon.h>
7425
+#include "arm-neon-ref.h"
7426
+#include "compute-ref-data.h"
7428
+/* Expected values of cumulative_saturation flag. */
7429
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
7430
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
7431
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
7432
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
7433
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
7434
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
7436
+/* Expected results. */
7437
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12,
7438
+ 0x12, 0x12, 0x12, 0x12 };
7439
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 };
7440
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 };
7441
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82,
7442
+ 0x82, 0x82, 0x82, 0x82 };
7443
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 };
7444
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 };
7446
+/* Expected values of cumulative_saturation flag when saturation occurs. */
7447
+int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1;
7448
+int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1;
7449
+int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1;
7450
+int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1;
7451
+int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1;
7452
+int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1;
7454
+/* Expected results when saturation occurs. */
7455
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
7456
+ 0x7f, 0x7f, 0x7f, 0x7f };
7457
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7458
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
7459
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
7460
+ 0xff, 0xff, 0xff, 0xff };
7461
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
7462
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
7464
+#define INSN_NAME vqmovn
7465
+#define TEST_MSG "VQMOVN"
7467
+#define FNNAME1(NAME) void exec_ ## NAME (void)
7468
+#define FNNAME(NAME) FNNAME1(NAME)
7472
+ /* Basic test: y=OP(x), then store the result. */
7473
+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7474
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
7475
+ VECT_VAR(vector_res, T1, W, N) = \
7476
+ INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \
7477
+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \
7478
+ VECT_VAR(vector_res, T1, W, N)); \
7479
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7481
+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7482
+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
7484
+ /* No need for 64 bits variants. */
7485
+ DECL_VARIABLE(vector, int, 16, 8);
7486
+ DECL_VARIABLE(vector, int, 32, 4);
7487
+ DECL_VARIABLE(vector, int, 64, 2);
7488
+ DECL_VARIABLE(vector, uint, 16, 8);
7489
+ DECL_VARIABLE(vector, uint, 32, 4);
7490
+ DECL_VARIABLE(vector, uint, 64, 2);
7492
+ DECL_VARIABLE(vector_res, int, 8, 8);
7493
+ DECL_VARIABLE(vector_res, int, 16, 4);
7494
+ DECL_VARIABLE(vector_res, int, 32, 2);
7495
+ DECL_VARIABLE(vector_res, uint, 8, 8);
7496
+ DECL_VARIABLE(vector_res, uint, 16, 4);
7497
+ DECL_VARIABLE(vector_res, uint, 32, 2);
7501
+ /* Fill input vector with arbitrary values. */
7502
+ VDUP(vector, q, int, s, 16, 8, 0x12);
7503
+ VDUP(vector, q, int, s, 32, 4, 0x1278);
7504
+ VDUP(vector, q, int, s, 64, 2, 0x12345678);
7505
+ VDUP(vector, q, uint, u, 16, 8, 0x82);
7506
+ VDUP(vector, q, uint, u, 32, 4, 0x8765);
7507
+ VDUP(vector, q, uint, u, 64, 2, 0x87654321);
7509
+ /* Apply a unary operator named INSN_NAME. */
7511
+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT);
7512
+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT);
7513
+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT);
7514
+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
7515
+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
7516
+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
7518
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
7519
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
7520
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
7521
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
7522
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
7523
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
7526
+ /* Fill input vector with arbitrary values which cause cumulative
7528
+ VDUP(vector, q, int, s, 16, 8, 0x1234);
7529
+ VDUP(vector, q, int, s, 32, 4, 0x12345678);
7530
+ VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL);
7531
+ VDUP(vector, q, uint, u, 16, 8, 0x8234);
7532
+ VDUP(vector, q, uint, u, 32, 4, 0x87654321);
7533
+ VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL);
7535
+ /* Apply a unary operator named INSN_NAME. */
7537
+#define CMT " (with saturation)"
7538
+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT);
7539
+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT);
7540
+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT);
7541
+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT);
7542
+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT);
7543
+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT);
7545
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT);
7546
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT);
7547
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT);
7548
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT);
7549
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT);
7550
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT);
7559
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c
7561
+#include <arm_neon.h>
7562
+#include "arm-neon-ref.h"
7563
+#include "compute-ref-data.h"
7565
+/* Expected values of cumulative_saturation flag. */
7566
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
7567
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
7568
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
7570
+/* Expected results. */
7571
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34,
7572
+ 0x34, 0x34, 0x34, 0x34 };
7573
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 };
7574
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 };
7576
+/* Expected values of cumulative_saturation flag with negative input. */
7577
+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1;
7578
+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1;
7579
+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1;
7581
+/* Expected results with negative input. */
7582
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
7583
+ 0x0, 0x0, 0x0, 0x0 };
7584
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
7585
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
7587
+#define INSN_NAME vqmovun
7588
+#define TEST_MSG "VQMOVUN"
7590
+#define FNNAME1(NAME) void exec_ ## NAME (void)
7591
+#define FNNAME(NAME) FNNAME1(NAME)
7595
+ /* Basic test: y=OP(x), then store the result. */
7596
+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7597
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
7598
+ VECT_VAR(vector_res, T1, W, N) = \
7599
+ INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \
7600
+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \
7601
+ VECT_VAR(vector_res, T1, W, N)); \
7602
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7604
+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7605
+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
7607
+ DECL_VARIABLE(vector, int, 16, 8);
7608
+ DECL_VARIABLE(vector, int, 32, 4);
7609
+ DECL_VARIABLE(vector, int, 64, 2);
7611
+ DECL_VARIABLE(vector_res, uint, 8, 8);
7612
+ DECL_VARIABLE(vector_res, uint, 16, 4);
7613
+ DECL_VARIABLE(vector_res, uint, 32, 2);
7617
+ /* Fill input vector with arbitrary values. */
7618
+ VDUP(vector, q, int, s, 16, 8, 0x34);
7619
+ VDUP(vector, q, int, s, 32, 4, 0x5678);
7620
+ VDUP(vector, q, int, s, 64, 2, 0x12345678);
7622
+ /* Apply a unary operator named INSN_NAME. */
7624
+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);
7625
+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);
7626
+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);
7628
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
7629
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
7630
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
7632
+ /* Fill input vector with negative values. */
7633
+ VDUP(vector, q, int, s, 16, 8, 0x8234);
7634
+ VDUP(vector, q, int, s, 32, 4, 0x87654321);
7635
+ VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL);
7637
+ /* Apply a unary operator named INSN_NAME. */
7639
+#define CMT " (negative input)"
7640
+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT);
7641
+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT);
7642
+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT);
7644
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
7645
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
7646
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
7655
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c
7657
+#include <arm_neon.h>
7658
+#include "arm-neon-ref.h"
7659
+#include "compute-ref-data.h"
7661
+/* Expected values of cumulative_saturation flag. */
7662
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
7663
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
7664
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
7665
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
7667
+/* Expected results. */
7668
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 };
7669
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
7670
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
7671
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
7673
+/* Expected values of cumulative_saturation flag when multiplication
7675
+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
7676
+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
7677
+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
7678
+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
7680
+/* Expected results when multiplication saturates. */
7681
+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7682
+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
7683
+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
7684
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7685
+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
7686
+ 0x7fffffff, 0x7fffffff };
7688
+/* Expected values of cumulative_saturation flag when rounding
7689
+ should not cause saturation. */
7690
+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
7691
+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
7692
+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
7693
+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
7695
+/* Expected results when rounding should not cause saturation. */
7696
+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7697
+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
7698
+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
7699
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7700
+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
7701
+ 0x7fffffff, 0x7fffffff };
7703
+#define INSN vqrdmulh
7704
+#define TEST_MSG "VQRDMULH"
7706
+#define FNNAME1(NAME) void exec_ ## NAME (void)
7707
+#define FNNAME(NAME) FNNAME1(NAME)
7711
+ /* vector_res = vqrdmulh(vector,vector2), then store the result. */
7712
+#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7713
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
7714
+ VECT_VAR(vector_res, T1, W, N) = \
7715
+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \
7716
+ VECT_VAR(vector2, T1, W, N)); \
7717
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
7718
+ VECT_VAR(vector_res, T1, W, N)); \
7719
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7721
+ /* Two auxliary macros are necessary to expand INSN */
7722
+#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7723
+ TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7725
+#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
7726
+ TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7729
+ DECL_VARIABLE(vector, int, 16, 4);
7730
+ DECL_VARIABLE(vector, int, 32, 2);
7731
+ DECL_VARIABLE(vector, int, 16, 8);
7732
+ DECL_VARIABLE(vector, int, 32, 4);
7734
+ DECL_VARIABLE(vector_res, int, 16, 4);
7735
+ DECL_VARIABLE(vector_res, int, 32, 2);
7736
+ DECL_VARIABLE(vector_res, int, 16, 8);
7737
+ DECL_VARIABLE(vector_res, int, 32, 4);
7739
+ DECL_VARIABLE(vector2, int, 16, 4);
7740
+ DECL_VARIABLE(vector2, int, 32, 2);
7741
+ DECL_VARIABLE(vector2, int, 16, 8);
7742
+ DECL_VARIABLE(vector2, int, 32, 4);
7746
+ VLOAD(vector, buffer, , int, s, 16, 4);
7747
+ VLOAD(vector, buffer, , int, s, 32, 2);
7748
+ VLOAD(vector, buffer, q, int, s, 16, 8);
7749
+ VLOAD(vector, buffer, q, int, s, 32, 4);
7751
+ /* Initialize vector2. */
7752
+ VDUP(vector2, , int, s, 16, 4, 0x5555);
7753
+ VDUP(vector2, , int, s, 32, 2, 0xBB);
7754
+ VDUP(vector2, q, int, s, 16, 8, 0x33);
7755
+ VDUP(vector2, q, int, s, 32, 4, 0x22);
7758
+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT);
7759
+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT);
7760
+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT);
7761
+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT);
7763
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
7764
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
7765
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
7766
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
7768
+ /* Now use input values such that the multiplication causes
7770
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
7771
+ VDUP(vector, , int, s, 16, 4, 0x8000);
7772
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
7773
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
7774
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
7775
+ VDUP(vector2, , int, s, 16, 4, 0x8000);
7776
+ VDUP(vector2, , int, s, 32, 2, 0x80000000);
7777
+ VDUP(vector2, q, int, s, 16, 8, 0x8000);
7778
+ VDUP(vector2, q, int, s, 32, 4, 0x80000000);
7780
+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
7781
+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
7782
+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
7783
+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
7785
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
7786
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
7787
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
7788
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
7790
+ /* Use input values where rounding produces a result equal to the
7791
+ saturation value, but does not set the saturation flag. */
7792
+#define TEST_MSG_ROUND " (check rounding)"
7793
+ VDUP(vector, , int, s, 16, 4, 0x8000);
7794
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
7795
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
7796
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
7797
+ VDUP(vector2, , int, s, 16, 4, 0x8001);
7798
+ VDUP(vector2, , int, s, 32, 2, 0x80000001);
7799
+ VDUP(vector2, q, int, s, 16, 8, 0x8001);
7800
+ VDUP(vector2, q, int, s, 32, 4, 0x80000001);
7802
+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
7803
+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND);
7804
+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND);
7805
+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);
7807
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
7808
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
7809
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
7810
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
7819
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c
7821
+#include <arm_neon.h>
7822
+#include "arm-neon-ref.h"
7823
+#include "compute-ref-data.h"
7825
+/* Expected values of cumulative_saturation flag. */
7826
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
7827
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
7828
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
7829
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
7831
+/* Expected results. */
7832
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
7833
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
7834
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
7835
+ 0x0, 0x0, 0x0, 0x0 };
7836
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
7838
+/* Expected values of cumulative_saturation flag when multiplication
7840
+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
7841
+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
7842
+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
7843
+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
7845
+/* Expected results when multiplication saturates. */
7846
+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7847
+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
7848
+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
7849
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7850
+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
7851
+ 0x7fffffff, 0x7fffffff };
7853
+/* Expected values of cumulative_saturation flag when rounding
7854
+ should not cause saturation. */
7855
+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
7856
+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
7857
+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
7858
+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
7860
+/* Expected results when rounding should not cause saturation. */
7861
+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7862
+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
7863
+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
7864
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
7865
+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
7866
+ 0x7fffffff, 0x7fffffff };
7868
+#define INSN vqrdmulh
7869
+#define TEST_MSG "VQRDMULH_LANE"
7871
+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
7872
+#define FNNAME(NAME) FNNAME1(NAME)
7876
+ /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */
7877
+#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
7878
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
7879
+ VECT_VAR(vector_res, T1, W, N) = \
7880
+ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \
7881
+ VECT_VAR(vector2, T1, W, N2), \
7883
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
7884
+ VECT_VAR(vector_res, T1, W, N)); \
7885
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
7887
+ /* Two auxliary macros are necessary to expand INSN */
7888
+#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
7889
+ TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
7891
+#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
7892
+ TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
7895
+ DECL_VARIABLE(vector, int, 16, 4);
7896
+ DECL_VARIABLE(vector, int, 32, 2);
7897
+ DECL_VARIABLE(vector, int, 16, 8);
7898
+ DECL_VARIABLE(vector, int, 32, 4);
7900
+ DECL_VARIABLE(vector_res, int, 16, 4);
7901
+ DECL_VARIABLE(vector_res, int, 32, 2);
7902
+ DECL_VARIABLE(vector_res, int, 16, 8);
7903
+ DECL_VARIABLE(vector_res, int, 32, 4);
7905
+ /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with
7906
+ the same number of elements, so we need only one variable of each
7908
+ DECL_VARIABLE(vector2, int, 16, 4);
7909
+ DECL_VARIABLE(vector2, int, 32, 2);
7913
+ VLOAD(vector, buffer, , int, s, 16, 4);
7914
+ VLOAD(vector, buffer, , int, s, 32, 2);
7916
+ VLOAD(vector, buffer, q, int, s, 16, 8);
7917
+ VLOAD(vector, buffer, q, int, s, 32, 4);
7919
+ /* Initialize vector2. */
7920
+ VDUP(vector2, , int, s, 16, 4, 0x55);
7921
+ VDUP(vector2, , int, s, 32, 2, 0xBB);
7923
+ /* Choose lane arbitrarily. */
7925
+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
7926
+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
7927
+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
7928
+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
7930
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
7931
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
7932
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
7933
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
7935
+ /* Now use input values such that the multiplication causes
7937
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
7938
+ VDUP(vector, , int, s, 16, 4, 0x8000);
7939
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
7940
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
7941
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
7942
+ VDUP(vector2, , int, s, 16, 4, 0x8000);
7943
+ VDUP(vector2, , int, s, 32, 2, 0x80000000);
7945
+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
7947
+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
7949
+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
7951
+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
7954
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
7955
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
7956
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
7957
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
7959
+ VDUP(vector, , int, s, 16, 4, 0x8000);
7960
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
7961
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
7962
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
7963
+ VDUP(vector2, , int, s, 16, 4, 0x8001);
7964
+ VDUP(vector2, , int, s, 32, 2, 0x80000001);
7966
+ /* Use input values where rounding produces a result equal to the
7967
+ saturation value, but does not set the saturation flag. */
7968
+#define TEST_MSG_ROUND " (check rounding)"
7969
+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
7971
+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
7973
+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
7975
+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
7978
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
7979
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
7980
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
7981
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
7986
+ exec_vqrdmulh_lane ();
7991
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c
7993
+#include <arm_neon.h>
7994
+#include "arm-neon-ref.h"
7995
+#include "compute-ref-data.h"
7997
+/* Expected values of cumulative_saturation flag. */
7998
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
7999
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
8000
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
8001
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
8003
+/* Expected results. */
8004
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd };
8005
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
8006
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5,
8007
+ 0x5, 0x4, 0x4, 0x4 };
8008
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe,
8009
+ 0xfffffffe, 0xfffffffe };
8011
+/* Expected values of cumulative_saturation flag when multiplication
8013
+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;
8014
+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;
8015
+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;
8016
+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;
8018
+/* Expected results when multiplication saturates. */
8019
+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
8020
+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
8021
+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
8022
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
8023
+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
8024
+ 0x7fffffff, 0x7fffffff };
8026
+/* Expected values of cumulative_saturation flag when rounding
8027
+ should not cause saturation. */
8028
+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;
8029
+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;
8030
+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;
8031
+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;
8033
+/* Expected results when rounding should not cause saturation. */
8034
+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
8035
+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
8036
+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
8037
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
8038
+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
8039
+ 0x7fffffff, 0x7fffffff };
8041
+#define INSN vqrdmulh
8042
+#define TEST_MSG "VQRDMULH_N"
8044
+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
8045
+#define FNNAME(NAME) FNNAME1(NAME)
8051
+ /* vector_res = vqrdmulh_n(vector,val), then store the result. */
8052
+#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
8053
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
8054
+ VECT_VAR(vector_res, T1, W, N) = \
8055
+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
8057
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
8058
+ VECT_VAR(vector_res, T1, W, N)); \
8059
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
8061
+ /* Two auxliary macros are necessary to expand INSN */
8062
+#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
8063
+ TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
8065
+#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
8066
+ TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
8069
+ DECL_VARIABLE(vector, int, 16, 4);
8070
+ DECL_VARIABLE(vector, int, 32, 2);
8071
+ DECL_VARIABLE(vector, int, 16, 8);
8072
+ DECL_VARIABLE(vector, int, 32, 4);
8074
+ DECL_VARIABLE(vector_res, int, 16, 4);
8075
+ DECL_VARIABLE(vector_res, int, 32, 2);
8076
+ DECL_VARIABLE(vector_res, int, 16, 8);
8077
+ DECL_VARIABLE(vector_res, int, 32, 4);
8081
+ VLOAD(vector, buffer, , int, s, 16, 4);
8082
+ VLOAD(vector, buffer, , int, s, 32, 2);
8083
+ VLOAD(vector, buffer, q, int, s, 16, 8);
8084
+ VLOAD(vector, buffer, q, int, s, 32, 4);
8086
+ /* Choose multiplier arbitrarily. */
8088
+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT);
8089
+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT);
8090
+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT);
8091
+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT);
8093
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
8094
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
8095
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
8096
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
8098
+ /* Now use input values such that the multiplication causes
8100
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
8101
+ VDUP(vector, , int, s, 16, 4, 0x8000);
8102
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
8103
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
8104
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
8106
+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul,
8108
+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul,
8110
+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul,
8112
+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul,
8115
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
8116
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
8117
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
8118
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
8120
+ /* Use input values where rounding produces a result equal to the
8121
+ saturation value, but does not set the saturation flag. */
8122
+#define TEST_MSG_ROUND " (check rounding)"
8123
+ VDUP(vector, , int, s, 16, 4, 0x8000);
8124
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
8125
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
8126
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
8128
+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round,
8130
+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round,
8132
+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round,
8134
+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round,
8137
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
8138
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
8139
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
8140
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
8145
+ exec_vqrdmulh_n ();
8149
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c
8151
+#include <arm_neon.h>
8152
+#include "arm-neon-ref.h"
8153
+#include "compute-ref-data.h"
8155
+/* Expected values of cumulative_saturation flag with input=0. */
8156
+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
8157
+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
8158
+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
8159
+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
8160
+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
8161
+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
8162
+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
8163
+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
8164
+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
8165
+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
8166
+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
8167
+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
8168
+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
8169
+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
8170
+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
8171
+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
8173
+/* Expected results with input=0. */
8174
+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8175
+ 0x0, 0x0, 0x0, 0x0 };
8176
+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8177
+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
8178
+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
8179
+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8180
+ 0x0, 0x0, 0x0, 0x0 };
8181
+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8182
+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
8183
+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
8184
+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8185
+ 0x0, 0x0, 0x0, 0x0,
8186
+ 0x0, 0x0, 0x0, 0x0,
8187
+ 0x0, 0x0, 0x0, 0x0 };
8188
+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8189
+ 0x0, 0x0, 0x0, 0x0 };
8190
+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8191
+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
8192
+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8193
+ 0x0, 0x0, 0x0, 0x0,
8194
+ 0x0, 0x0, 0x0, 0x0,
8195
+ 0x0, 0x0, 0x0, 0x0 };
8196
+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8197
+ 0x0, 0x0, 0x0, 0x0 };
8198
+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8199
+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
8201
+/* Expected values of cumulative_saturation flag with input=0 and
8202
+ negative shift amount. */
8203
+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
8204
+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
8205
+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
8206
+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
8207
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
8208
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
8209
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
8210
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
8211
+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
8212
+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
8213
+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
8214
+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
8215
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
8216
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
8217
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
8218
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
8220
+/* Expected results with input=0 and negative shift amount. */
8221
+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8222
+ 0x0, 0x0, 0x0, 0x0 };
8223
+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8224
+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
8225
+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
8226
+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8227
+ 0x0, 0x0, 0x0, 0x0 };
8228
+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8229
+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
8230
+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
8231
+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8232
+ 0x0, 0x0, 0x0, 0x0,
8233
+ 0x0, 0x0, 0x0, 0x0,
8234
+ 0x0, 0x0, 0x0, 0x0 };
8235
+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8236
+ 0x0, 0x0, 0x0, 0x0 };
8237
+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8238
+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
8239
+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8240
+ 0x0, 0x0, 0x0, 0x0,
8241
+ 0x0, 0x0, 0x0, 0x0,
8242
+ 0x0, 0x0, 0x0, 0x0 };
8243
+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8244
+ 0x0, 0x0, 0x0, 0x0 };
8245
+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8246
+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
8248
+/* Expected values of cumulative_saturation flag. */
8249
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
8250
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
8251
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
8252
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
8253
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
8254
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
8255
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
8256
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
8257
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
8258
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
8259
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
8260
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
8261
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
8262
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
8263
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
8264
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
8266
+/* Expected results. */
8267
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
8268
+ 0xe8, 0xea, 0xec, 0xee };
8269
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
8270
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
8271
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
8272
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
8273
+ 0xff, 0xff, 0xff, 0xff };
8274
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
8275
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
8276
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
8277
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
8278
+ 0x80, 0x80, 0x80, 0x80,
8279
+ 0x80, 0x80, 0x80, 0x80,
8280
+ 0x80, 0x80, 0x80, 0x80 };
8281
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
8282
+ 0x8000, 0x8000, 0x8000, 0x8000 };
8283
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
8284
+ 0x80000000, 0x80000000 };
8285
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 };
8286
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
8287
+ 0xff, 0xff, 0xff, 0xff,
8288
+ 0xff, 0xff, 0xff, 0xff,
8289
+ 0xff, 0xff, 0xff, 0xff };
8290
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
8291
+ 0xffff, 0xffff, 0xffff, 0xffff };
8292
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
8293
+ 0xffffffff, 0xffffffff };
8294
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
8295
+ 0xffffffffffffffff };
8297
+/* Expected values of cumulative_saturation flag with negative shift
8299
+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
8300
+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
8301
+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
8302
+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
8303
+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
8304
+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
8305
+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
8306
+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
8307
+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
8308
+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
8309
+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
8310
+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
8311
+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
8312
+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
8313
+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
8314
+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
8316
+/* Expected results with negative shift amount. */
8317
+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd,
8318
+ 0xfd, 0xfd, 0xfe, 0xfe };
8319
+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd };
8320
+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
8321
+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
8322
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d,
8323
+ 0x3d, 0x3d, 0x3e, 0x3e };
8324
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd };
8325
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
8326
+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
8327
+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8328
+ 0x0, 0x0, 0x0, 0x0,
8329
+ 0x0, 0x0, 0x0, 0x0,
8330
+ 0x0, 0x0, 0x0, 0x0 };
8331
+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8332
+ 0x0, 0x0, 0x0, 0x0 };
8333
+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8334
+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 };
8335
+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2,
8336
+ 0x2, 0x2, 0x2, 0x2,
8337
+ 0x2, 0x2, 0x2, 0x2,
8338
+ 0x2, 0x2, 0x2, 0x2 };
8339
+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20,
8340
+ 0x20, 0x20, 0x20, 0x20 };
8341
+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000,
8342
+ 0x80000, 0x80000 };
8343
+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 };
8345
+/* Expected values of cumulative_saturation flag with input=max and
8347
+int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0;
8348
+int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0;
8349
+int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0;
8350
+int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0;
8351
+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0;
8352
+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0;
8353
+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0;
8354
+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0;
8355
+int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0;
8356
+int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0;
8357
+int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0;
8358
+int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0;
8359
+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0;
8360
+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0;
8361
+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0;
8362
+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0;
8364
+/* Expected results with input=max and shift by -1. */
8365
+VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,
8366
+ 0x40, 0x40, 0x40, 0x40 };
8367
+VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
8368
+VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 };
8369
+VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 };
8370
+VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
8371
+ 0x80, 0x80, 0x80, 0x80 };
8372
+VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };
8373
+VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 };
8374
+VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 };
8375
+VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,
8376
+ 0x40, 0x40, 0x40, 0x40,
8377
+ 0x40, 0x40, 0x40, 0x40,
8378
+ 0x40, 0x40, 0x40, 0x40 };
8379
+VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,
8380
+ 0x4000, 0x4000, 0x4000, 0x4000 };
8381
+VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000,
8382
+ 0x40000000, 0x40000000 };
8383
+VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000,
8384
+ 0x4000000000000000 };
8385
+VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
8386
+ 0x80, 0x80, 0x80, 0x80,
8387
+ 0x80, 0x80, 0x80, 0x80,
8388
+ 0x80, 0x80, 0x80, 0x80 };
8389
+VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
8390
+ 0x8000, 0x8000, 0x8000, 0x8000 };
8391
+VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000,
8392
+ 0x80000000, 0x80000000 };
8393
+VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000,
8394
+ 0x8000000000000000 };
8396
+/* Expected values of cumulative_saturation flag with input=max and
8398
+int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0;
8399
+int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0;
8400
+int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0;
8401
+int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0;
8402
+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0;
8403
+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0;
8404
+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0;
8405
+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0;
8406
+int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0;
8407
+int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0;
8408
+int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0;
8409
+int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0;
8410
+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0;
8411
+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0;
8412
+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0;
8413
+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0;
8415
+/* Expected results with input=max and shift by -3. */
8416
+VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
8417
+ 0x10, 0x10, 0x10, 0x10 };
8418
+VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 };
8419
+VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 };
8420
+VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 };
8421
+VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,
8422
+ 0x20, 0x20, 0x20, 0x20 };
8423
+VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 };
8424
+VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 };
8425
+VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 };
8426
+VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,
8427
+ 0x10, 0x10, 0x10, 0x10,
8428
+ 0x10, 0x10, 0x10, 0x10,
8429
+ 0x10, 0x10, 0x10, 0x10 };
8430
+VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000,
8431
+ 0x1000, 0x1000, 0x1000, 0x1000 };
8432
+VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000,
8433
+ 0x10000000, 0x10000000 };
8434
+VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000,
8435
+ 0x1000000000000000 };
8436
+VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,
8437
+ 0x20, 0x20, 0x20, 0x20,
8438
+ 0x20, 0x20, 0x20, 0x20,
8439
+ 0x20, 0x20, 0x20, 0x20 };
8440
+VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000,
8441
+ 0x2000, 0x2000, 0x2000, 0x2000 };
8442
+VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000,
8443
+ 0x20000000, 0x20000000 };
8444
+VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000,
8445
+ 0x2000000000000000 };
8447
+/* Expected values of cumulative_saturation flag with input=max and
8448
+ large shift amount. */
8449
+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1;
8450
+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1;
8451
+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1;
8452
+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1;
8453
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1;
8454
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1;
8455
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1;
8456
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1;
8457
+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1;
8458
+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1;
8459
+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1;
8460
+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1;
8461
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1;
8462
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1;
8463
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1;
8464
+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1;
8466
+/* Expected results with input=max and large shift amount. */
8467
+VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
8468
+ 0x7f, 0x7f, 0x7f, 0x7f };
8469
+VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff,
8471
+VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
8472
+VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff };
8473
+VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
8474
+ 0xff, 0xff, 0xff, 0xff };
8475
+VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
8477
+VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff };
8478
+VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
8479
+VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
8480
+ 0x7f, 0x7f, 0x7f, 0x7f,
8481
+ 0x7f, 0x7f, 0x7f, 0x7f,
8482
+ 0x7f, 0x7f, 0x7f, 0x7f };
8483
+VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff,
8487
+VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
8488
+ 0x7fffffff, 0x7fffffff };
8489
+VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff,
8490
+ 0x7fffffffffffffff };
8491
+VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
8492
+ 0xff, 0xff, 0xff, 0xff,
8493
+ 0xff, 0xff, 0xff, 0xff,
8494
+ 0xff, 0xff, 0xff, 0xff };
8495
+VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
8499
+VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff,
8500
+ 0xffffffff, 0xffffffff };
8501
+VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
8502
+ 0xffffffffffffffff };
8504
+/* Expected values of cumulative_saturation flag with negative input and
8505
+ large shift amount. */
8506
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1;
8507
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1;
8508
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1;
8509
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1;
8510
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1;
8511
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1;
8512
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1;
8513
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1;
8514
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1;
8515
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1;
8516
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1;
8517
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1;
8518
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1;
8519
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1;
8520
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1;
8521
+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1;
8523
+/* Expected results with negative input and large shift amount. */
8524
+VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
8525
+ 0x80, 0x80, 0x80, 0x80 };
8526
+VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000,
8528
+VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 };
8529
+VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 };
8530
+VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
8531
+ 0xff, 0xff, 0xff, 0xff };
8532
+VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff,
8534
+VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff,
8536
+VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff };
8537
+VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
8538
+ 0x80, 0x80, 0x80, 0x80,
8539
+ 0x80, 0x80, 0x80, 0x80,
8540
+ 0x80, 0x80, 0x80, 0x80 };
8541
+VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000,
8545
+VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000,
8546
+ 0x80000000, 0x80000000 };
8547
+VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000,
8548
+ 0x8000000000000000 };
8549
+VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
8550
+ 0xff, 0xff, 0xff, 0xff,
8551
+ 0xff, 0xff, 0xff, 0xff,
8552
+ 0xff, 0xff, 0xff, 0xff };
8553
+VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff,
8557
+VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff,
8561
+VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff,
8562
+ 0xffffffffffffffff };
8564
+/* Expected values of cumulative_saturation flag with max/min input and
8565
+ large negative shift amount. */
8566
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0;
8567
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0;
8568
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0;
8569
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0;
8570
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0;
8571
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0;
8572
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0;
8573
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0;
8574
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0;
8575
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0;
8576
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0;
8577
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0;
8578
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0;
8579
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0;
8580
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0;
8581
+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0;
8583
+/* Expected results with max/min input and large negative shift amount. */
8584
+VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8585
+ 0x0, 0x0, 0x0, 0x0 };
8586
+VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8587
+VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
8588
+VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 };
8589
+VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8590
+ 0x0, 0x0, 0x0, 0x0 };
8591
+VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8592
+VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
8593
+VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 };
8594
+VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8595
+ 0x0, 0x0, 0x0, 0x0,
8596
+ 0x0, 0x0, 0x0, 0x0,
8597
+ 0x0, 0x0, 0x0, 0x0 };
8598
+VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8599
+ 0x0, 0x0, 0x0, 0x0 };
8600
+VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8601
+VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
8602
+VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8603
+ 0x0, 0x0, 0x0, 0x0,
8604
+ 0x0, 0x0, 0x0, 0x0,
8605
+ 0x0, 0x0, 0x0, 0x0 };
8606
+VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8607
+ 0x0, 0x0, 0x0, 0x0 };
8608
+VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8609
+VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
8611
+/* Expected values of cumulative_saturation flag with input=0 and
8612
+ large negative shift amount. */
8613
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0;
8614
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0;
8615
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0;
8616
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0;
8617
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0;
8618
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0;
8619
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0;
8620
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0;
8621
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0;
8622
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0;
8623
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0;
8624
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0;
8625
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0;
8626
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0;
8627
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0;
8628
+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0;
8630
+/* Expected results with input=0 and large negative shift amount. */
8631
+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8632
+ 0x0, 0x0, 0x0, 0x0 };
8633
+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8634
+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };
8635
+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 };
8636
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
8637
+ 0x0, 0x0, 0x0, 0x0 };
8638
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8639
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };
8640
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 };
8641
+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8642
+ 0x0, 0x0, 0x0, 0x0,
8643
+ 0x0, 0x0, 0x0, 0x0,
8644
+ 0x0, 0x0, 0x0, 0x0 };
8645
+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8646
+ 0x0, 0x0, 0x0, 0x0 };
8647
+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8648
+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };
8649
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
8650
+ 0x0, 0x0, 0x0, 0x0,
8651
+ 0x0, 0x0, 0x0, 0x0,
8652
+ 0x0, 0x0, 0x0, 0x0 };
8653
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
8654
+ 0x0, 0x0, 0x0, 0x0 };
8655
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
8656
+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };
8658
+#define INSN vqrshl
8659
+#define TEST_MSG "VQRSHL/VQRSHLQ"
8661
+#define FNNAME1(NAME) void exec_ ## NAME (void)
8662
+#define FNNAME(NAME) FNNAME1(NAME)
8666
+ /* Basic test: v3=vqrshl(v1,v2), then store the result. */
8667
+#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
8668
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
8669
+ VECT_VAR(vector_res, T1, W, N) = \
8670
+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \
8671
+ VECT_VAR(vector_shift, T3, W, N)); \
8672
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
8673
+ VECT_VAR(vector_res, T1, W, N)); \
8674
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
8676
+ /* Two auxliary macros are necessary to expand INSN */
8677
+#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
8678
+ TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
8680
+#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
8681
+ TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
8683
+ DECL_VARIABLE_ALL_VARIANTS(vector);
8684
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
8686
+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
8690
+ /* Fill input vector with 0, to check saturation on limits. */
8691
+ VDUP(vector, , int, s, 8, 8, 0);
8692
+ VDUP(vector, , int, s, 16, 4, 0);
8693
+ VDUP(vector, , int, s, 32, 2, 0);
8694
+ VDUP(vector, , int, s, 64, 1, 0);
8695
+ VDUP(vector, , uint, u, 8, 8, 0);
8696
+ VDUP(vector, , uint, u, 16, 4, 0);
8697
+ VDUP(vector, , uint, u, 32, 2, 0);
8698
+ VDUP(vector, , uint, u, 64, 1, 0);
8699
+ VDUP(vector, q, int, s, 8, 16, 0);
8700
+ VDUP(vector, q, int, s, 16, 8, 0);
8701
+ VDUP(vector, q, int, s, 32, 4, 0);
8702
+ VDUP(vector, q, int, s, 64, 2, 0);
8703
+ VDUP(vector, q, uint, u, 8, 16, 0);
8704
+ VDUP(vector, q, uint, u, 16, 8, 0);
8705
+ VDUP(vector, q, uint, u, 32, 4, 0);
8706
+ VDUP(vector, q, uint, u, 64, 2, 0);
8708
+ /* Choose init value arbitrarily, will be used as shift amount */
8709
+ /* Use values equal to or one-less-than the type width to check
8710
+ behaviour on limits. */
8711
+ VDUP(vector_shift, , int, s, 8, 8, 7);
8712
+ VDUP(vector_shift, , int, s, 16, 4, 15);
8713
+ VDUP(vector_shift, , int, s, 32, 2, 31);
8714
+ VDUP(vector_shift, , int, s, 64, 1, 63);
8715
+ VDUP(vector_shift, q, int, s, 8, 16, 8);
8716
+ VDUP(vector_shift, q, int, s, 16, 8, 16);
8717
+ VDUP(vector_shift, q, int, s, 32, 4, 32);
8718
+ VDUP(vector_shift, q, int, s, 64, 2, 64);
8720
+#define CMT " (with input = 0)"
8721
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
8722
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
8723
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
8724
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
8725
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
8726
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
8727
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
8728
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
8729
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
8730
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
8731
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
8732
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
8733
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
8734
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
8735
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
8736
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
8738
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
8739
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
8740
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
8741
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
8742
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
8743
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
8744
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
8745
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
8746
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
8747
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
8748
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
8749
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
8750
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
8751
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
8752
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
8753
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
8756
+ /* Use negative shift amounts. */
8757
+ VDUP(vector_shift, , int, s, 8, 8, -1);
8758
+ VDUP(vector_shift, , int, s, 16, 4, -2);
8759
+ VDUP(vector_shift, , int, s, 32, 2, -3);
8760
+ VDUP(vector_shift, , int, s, 64, 1, -4);
8761
+ VDUP(vector_shift, q, int, s, 8, 16, -7);
8762
+ VDUP(vector_shift, q, int, s, 16, 8, -11);
8763
+ VDUP(vector_shift, q, int, s, 32, 4, -13);
8764
+ VDUP(vector_shift, q, int, s, 64, 2, -20);
8767
+#define CMT " (input 0 and negative shift amount)"
8768
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
8769
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
8770
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
8771
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
8772
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
8773
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
8774
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
8775
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
8776
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
8777
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
8778
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
8779
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
8780
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
8781
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
8782
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
8783
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
8785
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
8786
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
8787
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
8788
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
8789
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
8790
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
8791
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
8792
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
8793
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
8794
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
8795
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
8796
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
8797
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
8798
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
8799
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
8800
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
8803
+ /* Test again, with predefined input values. */
8804
+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
8806
+ /* Choose init value arbitrarily, will be used as shift amount. */
8807
+ VDUP(vector_shift, , int, s, 8, 8, 1);
8808
+ VDUP(vector_shift, , int, s, 16, 4, 3);
8809
+ VDUP(vector_shift, , int, s, 32, 2, 8);
8810
+ VDUP(vector_shift, , int, s, 64, 1, 3);
8811
+ VDUP(vector_shift, q, int, s, 8, 16, 10);
8812
+ VDUP(vector_shift, q, int, s, 16, 8, 12);
8813
+ VDUP(vector_shift, q, int, s, 32, 4, 31);
8814
+ VDUP(vector_shift, q, int, s, 64, 2, 63);
8818
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
8819
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
8820
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
8821
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
8822
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
8823
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
8824
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
8825
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
8826
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
8827
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
8828
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
8829
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
8830
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
8831
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
8832
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
8833
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
8835
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
8836
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
8837
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
8838
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
8839
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
8840
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
8841
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
8842
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
8843
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
8844
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
8845
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
8846
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
8847
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
8848
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
8849
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
8850
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
8853
+ /* Use negative shift amounts. */
8854
+ VDUP(vector_shift, , int, s, 8, 8, -2);
8855
+ VDUP(vector_shift, , int, s, 16, 4, -2);
8856
+ VDUP(vector_shift, , int, s, 32, 2, -3);
8857
+ VDUP(vector_shift, , int, s, 64, 1, -4);
8858
+ VDUP(vector_shift, q, int, s, 8, 16, -7);
8859
+ VDUP(vector_shift, q, int, s, 16, 8, -11);
8860
+ VDUP(vector_shift, q, int, s, 32, 4, -13);
8861
+ VDUP(vector_shift, q, int, s, 64, 2, -20);
8864
+#define CMT " (negative shift amount)"
8865
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
8866
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
8867
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
8868
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
8869
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
8870
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
8871
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
8872
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
8873
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
8874
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
8875
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
8876
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
8877
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
8878
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
8879
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
8880
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
8882
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
8883
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
8884
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
8885
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
8886
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
8887
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
8888
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
8889
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
8890
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
8891
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
8892
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
8893
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
8894
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
8895
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
8896
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
8897
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
8900
+ /* Fill input vector with max value, to check saturation on
8902
+ VDUP(vector, , int, s, 8, 8, 0x7F);
8903
+ VDUP(vector, , int, s, 16, 4, 0x7FFF);
8904
+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
8905
+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
8906
+ VDUP(vector, , uint, u, 8, 8, 0xFF);
8907
+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);
8908
+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
8909
+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
8910
+ VDUP(vector, q, int, s, 8, 16, 0x7F);
8911
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
8912
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
8913
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
8914
+ VDUP(vector, q, uint, u, 8, 16, 0xFF);
8915
+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
8916
+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
8917
+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
8919
+ /* Use -1 shift amount to check cumulative saturation with
8921
+ VDUP(vector_shift, , int, s, 8, 8, -1);
8922
+ VDUP(vector_shift, , int, s, 16, 4, -1);
8923
+ VDUP(vector_shift, , int, s, 32, 2, -1);
8924
+ VDUP(vector_shift, , int, s, 64, 1, -1);
8925
+ VDUP(vector_shift, q, int, s, 8, 16, -1);
8926
+ VDUP(vector_shift, q, int, s, 16, 8, -1);
8927
+ VDUP(vector_shift, q, int, s, 32, 4, -1);
8928
+ VDUP(vector_shift, q, int, s, 64, 2, -1);
8931
+#define CMT " (checking cumulative saturation: shift by -1)"
8932
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT);
8933
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT);
8934
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT);
8935
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT);
8936
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT);
8937
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT);
8938
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT);
8939
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT);
8940
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT);
8941
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT);
8942
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT);
8943
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT);
8944
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT);
8945
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT);
8946
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT);
8947
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT);
8949
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT);
8950
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT);
8951
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT);
8952
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT);
8953
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT);
8954
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT);
8955
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT);
8956
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT);
8957
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT);
8958
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT);
8959
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT);
8960
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT);
8961
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT);
8962
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT);
8963
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT);
8964
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT);
8967
+ /* Use -3 shift amount to check cumulative saturation with
8969
+ VDUP(vector_shift, , int, s, 8, 8, -3);
8970
+ VDUP(vector_shift, , int, s, 16, 4, -3);
8971
+ VDUP(vector_shift, , int, s, 32, 2, -3);
8972
+ VDUP(vector_shift, , int, s, 64, 1, -3);
8973
+ VDUP(vector_shift, q, int, s, 8, 16, -3);
8974
+ VDUP(vector_shift, q, int, s, 16, 8, -3);
8975
+ VDUP(vector_shift, q, int, s, 32, 4, -3);
8976
+ VDUP(vector_shift, q, int, s, 64, 2, -3);
8979
+#define CMT " (checking cumulative saturation: shift by -3)"
8980
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT);
8981
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT);
8982
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT);
8983
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT);
8984
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT);
8985
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT);
8986
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT);
8987
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT);
8988
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT);
8989
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT);
8990
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT);
8991
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT);
8992
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT);
8993
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT);
8994
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT);
8995
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT);
8997
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT);
8998
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT);
8999
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT);
9000
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT);
9001
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT);
9002
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT);
9003
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT);
9004
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT);
9005
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT);
9006
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT);
9007
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT);
9008
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT);
9009
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT);
9010
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT);
9011
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT);
9012
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT);
9015
+ /* Use large shift amount. */
9016
+ VDUP(vector_shift, , int, s, 8, 8, 10);
9017
+ VDUP(vector_shift, , int, s, 16, 4, 20);
9018
+ VDUP(vector_shift, , int, s, 32, 2, 40);
9019
+ VDUP(vector_shift, , int, s, 64, 1, 70);
9020
+ VDUP(vector_shift, q, int, s, 8, 16, 10);
9021
+ VDUP(vector_shift, q, int, s, 16, 8, 20);
9022
+ VDUP(vector_shift, q, int, s, 32, 4, 40);
9023
+ VDUP(vector_shift, q, int, s, 64, 2, 70);
9026
+#define CMT " (checking cumulative saturation: large shift amount)"
9027
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT);
9028
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT);
9029
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT);
9030
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT);
9031
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT);
9032
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT);
9033
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT);
9034
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT);
9035
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT);
9036
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT);
9037
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT);
9038
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT);
9039
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT);
9040
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT);
9041
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT);
9042
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT);
9044
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT);
9045
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT);
9046
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT);
9047
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT);
9048
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT);
9049
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT);
9050
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT);
9051
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT);
9052
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT);
9053
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT);
9054
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT);
9055
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT);
9056
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT);
9057
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT);
9058
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT);
9059
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT);
9062
+ /* Fill input vector with negative values, to check saturation on
9064
+ VDUP(vector, , int, s, 8, 8, 0x80);
9065
+ VDUP(vector, , int, s, 16, 4, 0x8000);
9066
+ VDUP(vector, , int, s, 32, 2, 0x80000000);
9067
+ VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);
9068
+ VDUP(vector, q, int, s, 8, 16, 0x80);
9069
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
9070
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
9071
+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
9073
+ /* Use large shift amount. */
9074
+ VDUP(vector_shift, , int, s, 8, 8, 10);
9075
+ VDUP(vector_shift, , int, s, 16, 4, 20);
9076
+ VDUP(vector_shift, , int, s, 32, 2, 40);
9077
+ VDUP(vector_shift, , int, s, 64, 1, 70);
9078
+ VDUP(vector_shift, q, int, s, 8, 16, 10);
9079
+ VDUP(vector_shift, q, int, s, 16, 8, 20);
9080
+ VDUP(vector_shift, q, int, s, 32, 4, 40);
9081
+ VDUP(vector_shift, q, int, s, 64, 2, 70);
9084
+#define CMT " (checking cumulative saturation: large shift amount with negative input)"
9085
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
9086
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
9087
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
9088
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
9089
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);
9090
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);
9091
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);
9092
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);
9093
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
9094
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
9095
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
9096
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
9097
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);
9098
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);
9099
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);
9100
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);
9102
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT);
9103
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT);
9104
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT);
9105
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT);
9106
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT);
9107
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT);
9108
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT);
9109
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT);
9110
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT);
9111
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT);
9112
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT);
9113
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT);
9114
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT);
9115
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT);
9116
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT);
9117
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT);
9120
+ /* Fill input vector with negative and positive values, to check
9121
+ * saturation on limits */
9122
+ VDUP(vector, , int, s, 8, 8, 0x7F);
9123
+ VDUP(vector, , int, s, 16, 4, 0x7FFF);
9124
+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
9125
+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
9126
+ VDUP(vector, q, int, s, 8, 16, 0x80);
9127
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
9128
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
9129
+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
9131
+ /* Use large negative shift amount */
9132
+ VDUP(vector_shift, , int, s, 8, 8, -10);
9133
+ VDUP(vector_shift, , int, s, 16, 4, -20);
9134
+ VDUP(vector_shift, , int, s, 32, 2, -40);
9135
+ VDUP(vector_shift, , int, s, 64, 1, -70);
9136
+ VDUP(vector_shift, q, int, s, 8, 16, -10);
9137
+ VDUP(vector_shift, q, int, s, 16, 8, -20);
9138
+ VDUP(vector_shift, q, int, s, 32, 4, -40);
9139
+ VDUP(vector_shift, q, int, s, 64, 2, -70);
9142
+#define CMT " (checking cumulative saturation: large negative shift amount)"
9143
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
9144
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
9145
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
9146
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
9147
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
9148
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
9149
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
9150
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
9151
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
9152
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
9153
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
9154
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
9155
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
9156
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
9157
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
9158
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
9160
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
9161
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
9162
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
9163
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
9164
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
9165
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
9166
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
9167
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
9168
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
9169
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
9170
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
9171
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
9172
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
9173
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
9174
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
9175
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
9178
+ /* Fill input vector with 0, to check saturation in case of large
9180
+ VDUP(vector, , int, s, 8, 8, 0);
9181
+ VDUP(vector, , int, s, 16, 4, 0);
9182
+ VDUP(vector, , int, s, 32, 2, 0);
9183
+ VDUP(vector, , int, s, 64, 1, 0);
9184
+ VDUP(vector, q, int, s, 8, 16, 0);
9185
+ VDUP(vector, q, int, s, 16, 8, 0);
9186
+ VDUP(vector, q, int, s, 32, 4, 0);
9187
+ VDUP(vector, q, int, s, 64, 2, 0);
9189
+ /* Use large shift amount */
9190
+ VDUP(vector_shift, , int, s, 8, 8, -10);
9191
+ VDUP(vector_shift, , int, s, 16, 4, -20);
9192
+ VDUP(vector_shift, , int, s, 32, 2, -40);
9193
+ VDUP(vector_shift, , int, s, 64, 1, -70);
9194
+ VDUP(vector_shift, q, int, s, 8, 16, -10);
9195
+ VDUP(vector_shift, q, int, s, 16, 8, -20);
9196
+ VDUP(vector_shift, q, int, s, 32, 4, -40);
9197
+ VDUP(vector_shift, q, int, s, 64, 2, -70);
9200
+#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)"
9201
+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
9202
+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
9203
+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
9204
+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
9205
+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);
9206
+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);
9207
+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);
9208
+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);
9209
+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
9210
+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
9211
+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
9212
+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
9213
+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);
9214
+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);
9215
+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);
9216
+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);
9218
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);
9219
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);
9220
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);
9221
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);
9222
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);
9223
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);
9224
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);
9225
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);
9226
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);
9227
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);
9228
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);
9229
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);
9230
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);
9231
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);
9232
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);
9233
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);
9242
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c
9244
+#include <arm_neon.h>
9245
+#include "arm-neon-ref.h"
9246
+#include "compute-ref-data.h"
9248
+/* Expected values of cumulative_saturation flag. */
9249
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
9250
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
9251
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
9252
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
9253
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
9254
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
9256
+/* Expected results. */
9257
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,
9258
+ 0xfa, 0xfb, 0xfb, 0xfc };
9259
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa };
9260
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
9261
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9262
+ 0xff, 0xff, 0xff, 0xff };
9263
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
9264
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9266
+/* Expected values of cumulative_saturation flag with shift by 3. */
9267
+int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1;
9268
+int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1;
9269
+int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1;
9270
+int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1;
9271
+int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1;
9272
+int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1;
9274
+/* Expected results with shift by 3. */
9275
+VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9276
+ 0x7f, 0x7f, 0x7f, 0x7f };
9277
+VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
9278
+VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
9279
+VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9280
+ 0xff, 0xff, 0xff, 0xff };
9281
+VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
9282
+VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9284
+/* Expected values of cumulative_saturation flag with shift by max
9286
+int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1;
9287
+int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1;
9288
+int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1;
9289
+int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1;
9290
+int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1;
9291
+int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1;
9293
+/* Expected results with shift by max amount. */
9294
+VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9295
+ 0x7f, 0x7f, 0x7f, 0x7f };
9296
+VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
9297
+VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
9298
+VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9299
+ 0xff, 0xff, 0xff, 0xff };
9300
+VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
9301
+VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9303
+#define INSN vqrshrn_n
9304
+#define TEST_MSG "VQRSHRN_N"
9306
+#define FNNAME1(NAME) void exec_ ## NAME (void)
9307
+#define FNNAME(NAME) FNNAME1(NAME)
9311
+ /* Basic test: y=vqrshrn_n(x,v), then store the result. */
9312
+#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9313
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \
9314
+ VECT_VAR(vector_res, T1, W2, N) = \
9315
+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \
9317
+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \
9318
+ VECT_VAR(vector_res, T1, W2, N)); \
9319
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
9321
+ /* Two auxliary macros are necessary to expand INSN */
9322
+#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9323
+ TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
9325
+#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9326
+ TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
9329
+ /* vector is twice as large as vector_res. */
9330
+ DECL_VARIABLE(vector, int, 16, 8);
9331
+ DECL_VARIABLE(vector, int, 32, 4);
9332
+ DECL_VARIABLE(vector, int, 64, 2);
9333
+ DECL_VARIABLE(vector, uint, 16, 8);
9334
+ DECL_VARIABLE(vector, uint, 32, 4);
9335
+ DECL_VARIABLE(vector, uint, 64, 2);
9337
+ DECL_VARIABLE(vector_res, int, 8, 8);
9338
+ DECL_VARIABLE(vector_res, int, 16, 4);
9339
+ DECL_VARIABLE(vector_res, int, 32, 2);
9340
+ DECL_VARIABLE(vector_res, uint, 8, 8);
9341
+ DECL_VARIABLE(vector_res, uint, 16, 4);
9342
+ DECL_VARIABLE(vector_res, uint, 32, 2);
9346
+ VLOAD(vector, buffer, q, int, s, 16, 8);
9347
+ VLOAD(vector, buffer, q, int, s, 32, 4);
9348
+ VLOAD(vector, buffer, q, int, s, 64, 2);
9349
+ VLOAD(vector, buffer, q, uint, u, 16, 8);
9350
+ VLOAD(vector, buffer, q, uint, u, 32, 4);
9351
+ VLOAD(vector, buffer, q, uint, u, 64, 2);
9353
+ /* Choose shift amount arbitrarily. */
9355
+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
9356
+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
9357
+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
9358
+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
9359
+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
9360
+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
9362
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
9363
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
9364
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
9365
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
9366
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
9367
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
9370
+ /* Another set of tests, shifting max value by 3. */
9371
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
9372
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
9373
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
9374
+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
9375
+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
9376
+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
9379
+#define CMT " (check saturation: shift by 3)"
9380
+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
9381
+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
9382
+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
9383
+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);
9384
+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);
9385
+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);
9387
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT);
9388
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT);
9389
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT);
9390
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT);
9391
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT);
9392
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT);
9395
+ /* Shift by max amount. */
9397
+#define CMT " (check saturation: shift by max)"
9398
+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
9399
+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
9400
+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
9401
+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);
9402
+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);
9403
+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);
9405
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT);
9406
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT);
9407
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT);
9408
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT);
9409
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT);
9410
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT);
9415
+ exec_vqrshrn_n ();
9419
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c
9421
+#include <arm_neon.h>
9422
+#include "arm-neon-ref.h"
9423
+#include "compute-ref-data.h"
9425
+/* Expected values of cumulative_saturation flag with negative unput. */
9426
+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
9427
+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
9428
+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
9430
+/* Expected results with negative input. */
9431
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9432
+ 0x0, 0x0, 0x0, 0x0 };
9433
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9434
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
9436
+/* Expected values of cumulative_saturation flag with max input value
9438
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
9439
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
9440
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
9442
+/* Expected results with max input value shifted by 1. */
9443
+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9444
+ 0xff, 0xff, 0xff, 0xff };
9445
+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
9447
+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9448
+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
9450
+/* Expected values of cumulative_saturation flag with max input value
9451
+ shifted by max amount. */
9452
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
9453
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
9454
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
9456
+/* Expected results with max input value shifted by max amount. */
9457
+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
9458
+ 0x80, 0x80, 0x80, 0x80 };
9459
+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000,
9461
+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 };
9463
+/* Expected values of cumulative_saturation flag with min input value
9464
+ shifted by max amount. */
9465
+int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1;
9466
+int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1;
9467
+int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1;
9469
+/* Expected results with min input value shifted by max amount. */
9470
+VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9471
+ 0x0, 0x0, 0x0, 0x0 };
9472
+VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9473
+VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 };
9475
+/* Expected values of cumulative_saturation flag with inputs in usual
9477
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
9478
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
9479
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
9481
+/* Expected results with inputs in usual range. */
9482
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49,
9483
+ 0x49, 0x49, 0x49, 0x49 };
9484
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9485
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf };
9487
+#define INSN vqrshrun_n
9488
+#define TEST_MSG "VQRSHRUN_N"
9490
+#define FNNAME1(NAME) void exec_ ## NAME (void)
9491
+#define FNNAME(NAME) FNNAME1(NAME)
9495
+ /* Basic test: y=vqrshrun_n(x,v), then store the result. */
9496
+#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9497
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \
9498
+ VECT_VAR(vector_res, uint, W2, N) = \
9499
+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \
9501
+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \
9502
+ VECT_VAR(vector_res, uint, W2, N)); \
9503
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
9505
+ /* Two auxliary macros are necessary to expand INSN */
9506
+#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9507
+ TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
9509
+#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
9510
+ TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
9513
+ /* vector is twice as large as vector_res. */
9514
+ DECL_VARIABLE(vector, int, 16, 8);
9515
+ DECL_VARIABLE(vector, int, 32, 4);
9516
+ DECL_VARIABLE(vector, int, 64, 2);
9518
+ DECL_VARIABLE(vector_res, uint, 8, 8);
9519
+ DECL_VARIABLE(vector_res, uint, 16, 4);
9520
+ DECL_VARIABLE(vector_res, uint, 32, 2);
9524
+ /* Fill input vector with negative values, to check saturation on
9526
+ VDUP(vector, q, int, s, 16, 8, -2);
9527
+ VDUP(vector, q, int, s, 32, 4, -3);
9528
+ VDUP(vector, q, int, s, 64, 2, -4);
9530
+ /* Choose shift amount arbitrarily. */
9531
+#define CMT " (negative input)"
9532
+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
9533
+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
9534
+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
9536
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
9537
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
9538
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
9541
+ /* Fill input vector with max value, to check saturation on
9543
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
9544
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
9545
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
9549
+#define CMT " (check cumulative saturation: shift by 1)"
9550
+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
9551
+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
9552
+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
9554
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
9555
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
9556
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
9559
+ /* shift by max. */
9561
+#define CMT " (check cumulative saturation: shift by max, positive input)"
9562
+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
9563
+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
9564
+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
9566
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
9567
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
9568
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
9571
+ /* Fill input vector with min value, to check saturation on limits. */
9572
+ VDUP(vector, q, int, s, 16, 8, 0x8000);
9573
+ VDUP(vector, q, int, s, 32, 4, 0x80000000);
9574
+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
9576
+ /* shift by max */
9578
+#define CMT " (check cumulative saturation: shift by max, negative input)"
9579
+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT);
9580
+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT);
9581
+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT);
9583
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT);
9584
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT);
9585
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT);
9588
+ /* Fill input vector with positive values, to check normal case. */
9589
+ VDUP(vector, q, int, s, 16, 8, 0x1234);
9590
+ VDUP(vector, q, int, s, 32, 4, 0x87654321);
9591
+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
9593
+ /* shift arbitrary amount. */
9596
+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
9597
+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
9598
+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
9600
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
9601
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
9602
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
9607
+ exec_vqrshrun_n ();
9611
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c
9613
+#include <arm_neon.h>
9614
+#include "arm-neon-ref.h"
9615
+#include "compute-ref-data.h"
9617
+/* Expected values of cumulative_saturation flag with input=0. */
9618
+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
9619
+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
9620
+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
9621
+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
9622
+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
9623
+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
9624
+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
9625
+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
9626
+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
9627
+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
9628
+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
9629
+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
9630
+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
9631
+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
9632
+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
9633
+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
9635
+/* Expected results with input=0. */
9636
+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9637
+ 0x0, 0x0, 0x0, 0x0 };
9638
+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9639
+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
9640
+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
9641
+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9642
+ 0x0, 0x0, 0x0, 0x0 };
9643
+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9644
+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
9645
+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
9646
+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
9647
+ 0x0, 0x0, 0x0, 0x0,
9648
+ 0x0, 0x0, 0x0, 0x0,
9649
+ 0x0, 0x0, 0x0, 0x0 };
9650
+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
9651
+ 0x0, 0x0, 0x0, 0x0 };
9652
+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9653
+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
9654
+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
9655
+ 0x0, 0x0, 0x0, 0x0,
9656
+ 0x0, 0x0, 0x0, 0x0,
9657
+ 0x0, 0x0, 0x0, 0x0 };
9658
+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
9659
+ 0x0, 0x0, 0x0, 0x0 };
9660
+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9661
+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
9663
+/* Expected values of cumulative_saturation flag with input=0 and
9664
+ negative shift amount. */
9665
+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
9666
+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
9667
+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
9668
+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
9669
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
9670
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
9671
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
9672
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
9673
+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
9674
+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
9675
+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
9676
+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
9677
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
9678
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
9679
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
9680
+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
9682
+/* Expected results with input=0 and negative shift amount. */
9683
+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9684
+ 0x0, 0x0, 0x0, 0x0 };
9685
+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9686
+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
9687
+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
9688
+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
9689
+ 0x0, 0x0, 0x0, 0x0 };
9690
+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9691
+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
9692
+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
9693
+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
9694
+ 0x0, 0x0, 0x0, 0x0,
9695
+ 0x0, 0x0, 0x0, 0x0,
9696
+ 0x0, 0x0, 0x0, 0x0 };
9697
+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
9698
+ 0x0, 0x0, 0x0, 0x0 };
9699
+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9700
+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
9701
+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
9702
+ 0x0, 0x0, 0x0, 0x0,
9703
+ 0x0, 0x0, 0x0, 0x0,
9704
+ 0x0, 0x0, 0x0, 0x0 };
9705
+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
9706
+ 0x0, 0x0, 0x0, 0x0 };
9707
+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
9708
+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
9710
+/* Expected values of cumulative_saturation flag. */
9711
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
9712
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
9713
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
9714
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
9715
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
9716
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
9717
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
9718
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
9719
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
9720
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
9721
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
9722
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
9723
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
9724
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
9725
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
9726
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
9728
+/* Expected results. */
9729
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
9730
+ 0xe8, 0xea, 0xec, 0xee };
9731
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
9732
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
9733
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };
9734
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9735
+ 0xff, 0xff, 0xff, 0xff };
9736
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
9737
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9738
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };
9739
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
9740
+ 0x80, 0x80, 0x80, 0x80,
9741
+ 0x80, 0x80, 0x80, 0x80,
9742
+ 0x80, 0x80, 0x80, 0x80 };
9743
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
9744
+ 0x8000, 0x8000, 0x8000, 0x8000 };
9745
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
9746
+ 0x80000000, 0x80000000 };
9747
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000,
9748
+ 0x8000000000000000 };
9749
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
9750
+ 0xff, 0xff, 0xff, 0xff,
9751
+ 0xff, 0xff, 0xff, 0xff,
9752
+ 0xff, 0xff, 0xff, 0xff };
9753
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
9754
+ 0xffff, 0xffff, 0xffff, 0xffff };
9755
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
9756
+ 0xffffffff, 0xffffffff };
9757
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
9758
+ 0xffffffffffffffff };
9760
+/* Expected values of cumulative_sat_saturation flag with negative shift
9762
+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
9763
+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
9764
+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
9765
+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
9766
+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
9767
+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
9768
+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
9769
+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
9770
+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
9771
+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
9772
+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
9773
+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
9774
+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
9775
+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
9776
+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
9777
+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
9779
+/* Expected results with negative shift amount. */
9780
+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
9781
+ 0xfa, 0xfa, 0xfb, 0xfb };
9782
+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc };
9783
+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
9784
+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
9785
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
9786
+ 0x7a, 0x7a, 0x7b, 0x7b };
9787
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc };
9788
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
9789
+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
9790
+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
9791
+ 0xff, 0xff, 0xff, 0xff,
9792
+ 0xff, 0xff, 0xff, 0xff,
9793
+ 0xff, 0xff, 0xff, 0xff };
9794
+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
9795
+ 0xffff, 0xffff, 0xffff, 0xffff };
9796
+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff,
9797
+ 0xffffffff, 0xffffffff };
9798
+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff,
9799
+ 0xffffffffffffffff };
9800
+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
9801
+ 0x1, 0x1, 0x1, 0x1,
9802
+ 0x1, 0x1, 0x1, 0x1,
9803
+ 0x1, 0x1, 0x1, 0x1 };
9804
+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f,
9805
+ 0x1f, 0x1f, 0x1f, 0x1f };
9806
+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff,
9807
+ 0x7ffff, 0x7ffff };
9808
+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff };
9810
+/* Expected values of cumulative_sat_saturation flag with negative
9811
+ input and large shift amount. */
9812
+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1;
9813
+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1;
9814
+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1;
9815
+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1;
9816
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1;
9817
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1;
9818
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1;
9819
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1;
9820
+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1;
9821
+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1;
9822
+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1;
9823
+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1;
9824
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1;
9825
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1;
9826
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1;
9827
+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1;
9829
+/* Expected results with negative input and large shift amount. */
9830
+VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
9831
+ 0x80, 0x80, 0x80, 0x80 };
9832
+VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000,
9834
+VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 };
9835
+VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 };
9836
+VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9837
+ 0xff, 0xff, 0xff, 0xff };
9838
+VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff,
9840
+VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9841
+VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff };
9842
+VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
9843
+ 0x80, 0x80, 0x80, 0x80,
9844
+ 0x80, 0x80, 0x80, 0x80,
9845
+ 0x80, 0x80, 0x80, 0x80 };
9846
+VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000,
9850
+VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000,
9851
+ 0x80000000, 0x80000000 };
9852
+VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000,
9853
+ 0x8000000000000000 };
9854
+VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
9855
+ 0xff, 0xff, 0xff, 0xff,
9856
+ 0xff, 0xff, 0xff, 0xff,
9857
+ 0xff, 0xff, 0xff, 0xff };
9858
+VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff,
9862
+VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
9863
+ 0xffffffff, 0xffffffff };
9864
+VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff,
9865
+ 0xffffffffffffffff };
9867
+/* Expected values of cumulative_sat_saturation flag with max input
9868
+ and shift by -1. */
9869
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0;
9870
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0;
9871
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0;
9872
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0;
9873
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0;
9874
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0;
9875
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0;
9876
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0;
9877
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0;
9878
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0;
9879
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0;
9880
+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0;
9881
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0;
9882
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0;
9883
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0;
9884
+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0;
9886
+/* Expected results with max input and shift by -1. */
9887
+VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
9888
+ 0x3f, 0x3f, 0x3f, 0x3f };
9889
+VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff,
9891
+VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff };
9892
+VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff };
9893
+VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9894
+ 0x7f, 0x7f, 0x7f, 0x7f };
9895
+VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff,
9897
+VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff };
9898
+VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff };
9899
+VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
9900
+ 0x3f, 0x3f, 0x3f, 0x3f,
9901
+ 0x3f, 0x3f, 0x3f, 0x3f,
9902
+ 0x3f, 0x3f, 0x3f, 0x3f };
9903
+VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff,
9907
+VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff,
9908
+ 0x3fffffff, 0x3fffffff };
9909
+VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff,
9910
+ 0x3fffffffffffffff };
9911
+VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9912
+ 0x7f, 0x7f, 0x7f, 0x7f,
9913
+ 0x7f, 0x7f, 0x7f, 0x7f,
9914
+ 0x7f, 0x7f, 0x7f, 0x7f };
9915
+VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff,
9919
+VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
9920
+ 0x7fffffff, 0x7fffffff };
9921
+VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff,
9922
+ 0x7fffffffffffffff };
9924
+/* Expected values of cumulative_sat_saturation flag with max input
9925
+ and large shift amount. */
9926
+int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1;
9927
+int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1;
9928
+int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1;
9929
+int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1;
9930
+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1;
9931
+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1;
9932
+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1;
9933
+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1;
9934
+int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1;
9935
+int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1;
9936
+int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1;
9937
+int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1;
9938
+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1;
9939
+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1;
9940
+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1;
9941
+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1;
9943
+/* Expected results with max input and large shift amount. */
9944
+VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9945
+ 0x7f, 0x7f, 0x7f, 0x7f };
9946
+VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff,
9948
+VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
9949
+VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff };
9950
+VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
9951
+ 0xff, 0xff, 0xff, 0xff };
9952
+VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff,
9954
+VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
9955
+VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff };
9956
+VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
9957
+ 0x7f, 0x7f, 0x7f, 0x7f,
9958
+ 0x7f, 0x7f, 0x7f, 0x7f,
9959
+ 0x7f, 0x7f, 0x7f, 0x7f };
9960
+VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff,
9964
+VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
9965
+ 0x7fffffff, 0x7fffffff };
9966
+VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff,
9967
+ 0x7fffffffffffffff };
9968
+VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
9969
+ 0xff, 0xff, 0xff, 0xff,
9970
+ 0xff, 0xff, 0xff, 0xff,
9971
+ 0xff, 0xff, 0xff, 0xff };
9972
+VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff,
9976
+VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
9977
+ 0xffffffff, 0xffffffff };
9978
+VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff,
9979
+ 0xffffffffffffffff };
9981
+/* Expected values of cumulative_sat_saturation flag with saturation
9982
+ on 64-bits values. */
9983
+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1;
9984
+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1;
9986
+/* Expected results with saturation on 64-bits values.. */
9987
+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 };
9988
+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff,
9989
+ 0x7fffffffffffffff };
9992
+#define TEST_MSG "VQSHL/VQSHLQ"
9994
+#define FNNAME1(NAME) void exec_ ## NAME (void)
9995
+#define FNNAME(NAME) FNNAME1(NAME)
9999
+ /* Basic test: v3=vqshl(v1,v2), then store the result. */
10000
+#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
10001
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
10002
+ VECT_VAR(vector_res, T1, W, N) = \
10003
+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \
10004
+ VECT_VAR(vector_shift, T3, W, N)); \
10005
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
10006
+ VECT_VAR(vector_res, T1, W, N)); \
10007
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10009
+ /* Two auxliary macros are necessary to expand INSN */
10010
+#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
10011
+ TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10013
+#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
10014
+ TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10017
+ DECL_VARIABLE_ALL_VARIANTS(vector);
10018
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
10020
+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
10022
+ clean_results ();
10024
+ /* Fill input vector with 0, to check saturation on limits. */
10025
+ VDUP(vector, , int, s, 8, 8, 0);
10026
+ VDUP(vector, , int, s, 16, 4, 0);
10027
+ VDUP(vector, , int, s, 32, 2, 0);
10028
+ VDUP(vector, , int, s, 64, 1, 0);
10029
+ VDUP(vector, , uint, u, 8, 8, 0);
10030
+ VDUP(vector, , uint, u, 16, 4, 0);
10031
+ VDUP(vector, , uint, u, 32, 2, 0);
10032
+ VDUP(vector, , uint, u, 64, 1, 0);
10033
+ VDUP(vector, q, int, s, 8, 16, 0);
10034
+ VDUP(vector, q, int, s, 16, 8, 0);
10035
+ VDUP(vector, q, int, s, 32, 4, 0);
10036
+ VDUP(vector, q, int, s, 64, 2, 0);
10037
+ VDUP(vector, q, uint, u, 8, 16, 0);
10038
+ VDUP(vector, q, uint, u, 16, 8, 0);
10039
+ VDUP(vector, q, uint, u, 32, 4, 0);
10040
+ VDUP(vector, q, uint, u, 64, 2, 0);
10042
+ /* Choose init value arbitrarily, will be used as shift amount */
10043
+ /* Use values equal or one-less-than the type width to check
10044
+ behaviour on limits. */
10046
+ /* 64-bits vectors first. */
10047
+ /* Shift 8-bits lanes by 7... */
10048
+ VDUP(vector_shift, , int, s, 8, 8, 7);
10049
+ /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9). */
10050
+ VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);
10051
+ VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);
10052
+ VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);
10054
+ /* Shift 16-bits lanes by 15... */
10055
+ VDUP(vector_shift, , int, s, 16, 4, 15);
10056
+ /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17). */
10057
+ VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);
10058
+ VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);
10059
+ VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);
10061
+ /* Shift 32-bits lanes by 31... */
10062
+ VDUP(vector_shift, , int, s, 32, 2, 31);
10063
+ /* ... except lane 1 (by 30). */
10064
+ VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);
10066
+ /* Shift 64 bits lane by 63. */
10067
+ VDUP(vector_shift, , int, s, 64, 1, 63);
10069
+ /* 128-bits vectors. */
10070
+ /* Shift 8-bits lanes by 8. */
10071
+ VDUP(vector_shift, q, int, s, 8, 16, 8);
10072
+ /* Shift 16-bits lanes by 16. */
10073
+ VDUP(vector_shift, q, int, s, 16, 8, 16);
10074
+ /* Shift 32-bits lanes by 32... */
10075
+ VDUP(vector_shift, q, int, s, 32, 4, 32);
10076
+ /* ... except lane 1 (by 33). */
10077
+ VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);
10079
+ /* Shift 64-bits lanes by 64... */
10080
+ VDUP(vector_shift, q, int, s, 64, 2, 64);
10081
+ /* ... except lane 1 (by 62). */
10082
+ VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);
10084
+#define CMT " (with input = 0)"
10085
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
10086
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
10087
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
10088
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
10089
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
10090
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
10091
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
10092
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
10093
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
10094
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
10095
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
10096
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
10097
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
10098
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
10099
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
10100
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
10102
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
10103
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
10104
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
10105
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
10106
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
10107
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
10108
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
10109
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
10110
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
10111
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
10112
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
10113
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
10114
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
10115
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
10116
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
10117
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
10120
+ /* Use negative shift amounts */
10121
+ VDUP(vector_shift, , int, s, 8, 8, -1);
10122
+ VDUP(vector_shift, , int, s, 16, 4, -2);
10123
+ VDUP(vector_shift, , int, s, 32, 2, -3);
10124
+ VDUP(vector_shift, , int, s, 64, 1, -4);
10125
+ VDUP(vector_shift, q, int, s, 8, 16, -7);
10126
+ VDUP(vector_shift, q, int, s, 16, 8, -11);
10127
+ VDUP(vector_shift, q, int, s, 32, 4, -13);
10128
+ VDUP(vector_shift, q, int, s, 64, 2, -20);
10131
+#define CMT " (input 0 and negative shift amount)"
10132
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
10133
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
10134
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
10135
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
10136
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
10137
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
10138
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
10139
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
10140
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
10141
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
10142
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
10143
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
10144
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
10145
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
10146
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
10147
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
10149
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
10150
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
10151
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
10152
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
10153
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
10154
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
10155
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
10156
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
10157
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
10158
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
10159
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
10160
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
10161
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
10162
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
10163
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
10164
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
10166
+ /* Test again, with predefined input values. */
10167
+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
10169
+ /* Choose init value arbitrarily, will be used as shift amount. */
10170
+ VDUP(vector_shift, , int, s, 8, 8, 1);
10171
+ VDUP(vector_shift, , int, s, 16, 4, 3);
10172
+ VDUP(vector_shift, , int, s, 32, 2, 8);
10173
+ VDUP(vector_shift, , int, s, 64, 1, -3);
10174
+ VDUP(vector_shift, q, int, s, 8, 16, 10);
10175
+ VDUP(vector_shift, q, int, s, 16, 8, 12);
10176
+ VDUP(vector_shift, q, int, s, 32, 4, 32);
10177
+ VDUP(vector_shift, q, int, s, 64, 2, 63);
10181
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
10182
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
10183
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
10184
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
10185
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
10186
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
10187
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
10188
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
10189
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
10190
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
10191
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
10192
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
10193
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
10194
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
10195
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
10196
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
10198
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
10199
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
10200
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
10201
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
10202
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
10203
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
10204
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
10205
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
10206
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
10207
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
10208
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
10209
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
10210
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
10211
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
10212
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
10213
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
10216
+ /* Use negative shift amounts */
10217
+ VDUP(vector_shift, , int, s, 8, 8, -1);
10218
+ VDUP(vector_shift, , int, s, 16, 4, -2);
10219
+ VDUP(vector_shift, , int, s, 32, 2, -3);
10220
+ VDUP(vector_shift, , int, s, 64, 1, -4);
10221
+ VDUP(vector_shift, q, int, s, 8, 16, -7);
10222
+ VDUP(vector_shift, q, int, s, 16, 8, -11);
10223
+ VDUP(vector_shift, q, int, s, 32, 4, -13);
10224
+ VDUP(vector_shift, q, int, s, 64, 2, -20);
10227
+#define CMT " (negative shift amount)"
10228
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
10229
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
10230
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
10231
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
10232
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
10233
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
10234
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
10235
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
10236
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
10237
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
10238
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
10239
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
10240
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
10241
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
10242
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
10243
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
10245
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
10246
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
10247
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
10248
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
10249
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
10250
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
10251
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
10252
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
10253
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
10254
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
10255
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
10256
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
10257
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
10258
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
10259
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
10260
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
10263
+ /* Use large shift amounts. */
10264
+ VDUP(vector_shift, , int, s, 8, 8, 8);
10265
+ VDUP(vector_shift, , int, s, 16, 4, 16);
10266
+ VDUP(vector_shift, , int, s, 32, 2, 32);
10267
+ VDUP(vector_shift, , int, s, 64, 1, 64);
10268
+ VDUP(vector_shift, q, int, s, 8, 16, 8);
10269
+ VDUP(vector_shift, q, int, s, 16, 8, 16);
10270
+ VDUP(vector_shift, q, int, s, 32, 4, 32);
10271
+ VDUP(vector_shift, q, int, s, 64, 2, 64);
10274
+#define CMT " (large shift amount, negative input)"
10275
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT);
10276
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT);
10277
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT);
10278
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT);
10279
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT);
10280
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT);
10281
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT);
10282
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT);
10283
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT);
10284
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT);
10285
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT);
10286
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT);
10287
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT);
10288
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT);
10289
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT);
10290
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT);
10292
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT);
10293
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT);
10294
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT);
10295
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT);
10296
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT);
10297
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT);
10298
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT);
10299
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT);
10300
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT);
10301
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT);
10302
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT);
10303
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT);
10304
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT);
10305
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT);
10306
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT);
10307
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT);
10310
+ /* Fill input vector with max value, to check saturation on limits */
10311
+ VDUP(vector, , int, s, 8, 8, 0x7F);
10312
+ VDUP(vector, , int, s, 16, 4, 0x7FFF);
10313
+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
10314
+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
10315
+ VDUP(vector, , uint, u, 8, 8, 0xFF);
10316
+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);
10317
+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
10318
+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
10319
+ VDUP(vector, q, int, s, 8, 16, 0x7F);
10320
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
10321
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
10322
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
10323
+ VDUP(vector, q, uint, u, 8, 16, 0xFF);
10324
+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
10325
+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
10326
+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
10328
+ /* Shift by -1 */
10329
+ VDUP(vector_shift, , int, s, 8, 8, -1);
10330
+ VDUP(vector_shift, , int, s, 16, 4, -1);
10331
+ VDUP(vector_shift, , int, s, 32, 2, -1);
10332
+ VDUP(vector_shift, , int, s, 64, 1, -1);
10333
+ VDUP(vector_shift, q, int, s, 8, 16, -1);
10334
+ VDUP(vector_shift, q, int, s, 16, 8, -1);
10335
+ VDUP(vector_shift, q, int, s, 32, 4, -1);
10336
+ VDUP(vector_shift, q, int, s, 64, 2, -1);
10339
+#define CMT " (max input, shift by -1)"
10340
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT);
10341
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT);
10342
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT);
10343
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT);
10344
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT);
10345
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT);
10346
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT);
10347
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT);
10348
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT);
10349
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT);
10350
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT);
10351
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT);
10352
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT);
10353
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT);
10354
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT);
10355
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT);
10357
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT);
10358
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT);
10359
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT);
10360
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT);
10361
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT);
10362
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT);
10363
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT);
10364
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT);
10365
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT);
10366
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT);
10367
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT);
10368
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT);
10369
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT);
10370
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT);
10371
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT);
10372
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT);
10375
+ /* Use large shift amounts */
10376
+ VDUP(vector_shift, , int, s, 8, 8, 8);
10377
+ VDUP(vector_shift, , int, s, 16, 4, 16);
10378
+ VDUP(vector_shift, , int, s, 32, 2, 32);
10379
+ VDUP(vector_shift, , int, s, 64, 1, 64);
10380
+ VDUP(vector_shift, q, int, s, 8, 16, 8);
10381
+ VDUP(vector_shift, q, int, s, 16, 8, 16);
10382
+ VDUP(vector_shift, q, int, s, 32, 4, 32);
10383
+ VDUP(vector_shift, q, int, s, 64, 2, 64);
10386
+#define CMT " (max input, large shift amount)"
10387
+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT);
10388
+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT);
10389
+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT);
10390
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT);
10391
+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT);
10392
+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT);
10393
+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT);
10394
+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT);
10395
+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT);
10396
+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT);
10397
+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT);
10398
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT);
10399
+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT);
10400
+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT);
10401
+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT);
10402
+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT);
10404
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT);
10405
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT);
10406
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT);
10407
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT);
10408
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT);
10409
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT);
10410
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT);
10411
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT);
10412
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT);
10413
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT);
10414
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT);
10415
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT);
10416
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT);
10417
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT);
10418
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT);
10419
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT);
10422
+ /* Check 64 bits saturation. */
10423
+ VDUP(vector, , int, s, 64, 1, -10);
10424
+ VDUP(vector_shift, , int, s, 64, 1, 64);
10425
+ VDUP(vector, q, int, s, 64, 2, 10);
10426
+ VDUP(vector_shift, q, int, s, 64, 2, 64);
10429
+#define CMT " (check saturation on 64 bits)"
10430
+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT);
10431
+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT);
10433
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT);
10434
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT);
10442
--- a/src//dev/null
10443
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c
10445
+#include <arm_neon.h>
10446
+#include "arm-neon-ref.h"
10447
+#include "compute-ref-data.h"
10449
+/* Expected values of cumulative_saturation flag. */
10450
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
10451
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
10452
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
10453
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
10454
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
10455
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
10456
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
10457
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
10458
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
10459
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
10460
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
10461
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
10462
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
10463
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
10464
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
10465
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
10467
+/* Expected results. */
10468
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
10469
+ 0xd0, 0xd4, 0xd8, 0xdc };
10470
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };
10471
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 };
10472
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };
10473
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
10474
+ 0xff, 0xff, 0xff, 0xff };
10475
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
10476
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
10477
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
10478
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
10479
+ 0xd0, 0xd4, 0xd8, 0xdc,
10480
+ 0xe0, 0xe4, 0xe8, 0xec,
10481
+ 0xf0, 0xf4, 0xf8, 0xfc };
10482
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
10483
+ 0xffe8, 0xffea, 0xffec, 0xffee };
10484
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,
10485
+ 0xffffffe4, 0xffffffe6 };
10486
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };
10487
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
10488
+ 0xff, 0xff, 0xff, 0xff,
10489
+ 0xff, 0xff, 0xff, 0xff,
10490
+ 0xff, 0xff, 0xff, 0xff };
10491
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
10492
+ 0xffff, 0xffff, 0xffff, 0xffff };
10493
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
10494
+ 0xffffffff, 0xffffffff };
10495
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
10496
+ 0xffffffffffffffff };
10498
+/* Expected values of cumulative_saturation flag with max positive input. */
10499
+int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1;
10500
+int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1;
10501
+int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1;
10502
+int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1;
10503
+int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1;
10504
+int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1;
10505
+int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1;
10506
+int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1;
10507
+int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1;
10508
+int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1;
10509
+int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1;
10510
+int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1;
10511
+int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1;
10512
+int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1;
10513
+int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1;
10514
+int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1;
10516
+/* Expected results with max positive input. */
10517
+VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
10518
+ 0x7f, 0x7f, 0x7f, 0x7f };
10519
+VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
10520
+VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
10521
+VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff };
10522
+VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
10523
+ 0xff, 0xff, 0xff, 0xff };
10524
+VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
10525
+VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff };
10526
+VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff };
10527
+VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
10528
+ 0x7f, 0x7f, 0x7f, 0x7f,
10529
+ 0x7f, 0x7f, 0x7f, 0x7f,
10530
+ 0x7f, 0x7f, 0x7f, 0x7f };
10531
+VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
10532
+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };
10533
+VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
10534
+ 0x7fffffff, 0x7fffffff };
10535
+VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff,
10536
+ 0x7fffffffffffffff };
10537
+VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
10538
+ 0xff, 0xff, 0xff, 0xff,
10539
+ 0xff, 0xff, 0xff, 0xff,
10540
+ 0xff, 0xff, 0xff, 0xff };
10541
+VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
10542
+ 0xffff, 0xffff, 0xffff, 0xffff };
10543
+VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff,
10544
+ 0xffffffff, 0xffffffff };
10545
+VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff,
10546
+ 0xffffffffffffffff };
10548
+#define INSN vqshl
10549
+#define TEST_MSG "VQSHL_N/VQSHLQ_N"
10551
+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
10552
+#define FNNAME(NAME) FNNAME1(NAME)
10556
+ /* Basic test: v2=vqshl_n(v1,v), then store the result. */
10557
+#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
10558
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \
10559
+ VECT_VAR(vector_res, T1, W, N) = \
10560
+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
10562
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
10563
+ VECT_VAR(vector_res, T1, W, N)); \
10564
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10566
+ /* Two auxliary macros are necessary to expand INSN */
10567
+#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
10568
+ TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10570
+#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
10571
+ TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10573
+ DECL_VARIABLE_ALL_VARIANTS(vector);
10574
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
10576
+ clean_results ();
10578
+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
10580
+ /* Choose shift amount arbitrarily. */
10582
+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT);
10583
+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT);
10584
+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT);
10585
+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT);
10586
+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT);
10587
+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
10588
+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
10589
+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT);
10591
+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT);
10592
+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT);
10593
+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT);
10594
+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT);
10595
+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT);
10596
+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT);
10597
+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT);
10598
+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT);
10600
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
10601
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
10602
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
10603
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
10604
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
10605
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
10606
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
10607
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
10608
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
10609
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
10610
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
10611
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
10612
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
10613
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
10614
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
10615
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
10618
+ /* Fill input vector with max value, to check saturation on limits. */
10619
+ VDUP(vector, , int, s, 8, 8, 0x7F);
10620
+ VDUP(vector, , int, s, 16, 4, 0x7FFF);
10621
+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
10622
+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
10623
+ VDUP(vector, , uint, u, 8, 8, 0xFF);
10624
+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);
10625
+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
10626
+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
10627
+ VDUP(vector, q, int, s, 8, 16, 0x7F);
10628
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
10629
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
10630
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
10631
+ VDUP(vector, q, uint, u, 8, 16, 0xFF);
10632
+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
10633
+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
10634
+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
10637
+#define CMT " (with max input)"
10638
+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT);
10639
+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT);
10640
+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT);
10641
+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT);
10642
+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT);
10643
+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT);
10644
+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT);
10645
+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT);
10647
+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT);
10648
+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT);
10649
+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT);
10650
+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT);
10651
+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT);
10652
+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT);
10653
+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT);
10654
+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT);
10656
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT);
10657
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT);
10658
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT);
10659
+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT);
10660
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT);
10661
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT);
10662
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT);
10663
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT);
10664
+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT);
10665
+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT);
10666
+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT);
10667
+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT);
10668
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT);
10669
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT);
10670
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT);
10671
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT);
10679
--- a/src//dev/null
10680
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c
10682
+#include <arm_neon.h>
10683
+#include "arm-neon-ref.h"
10684
+#include "compute-ref-data.h"
10686
+/* Expected values of cumulative_saturation flag with negative
10688
+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1;
10689
+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1;
10690
+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1;
10691
+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1;
10692
+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1;
10693
+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
10694
+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
10695
+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
10697
+/* Expected results with negative input. */
10698
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
10699
+ 0x0, 0x0, 0x0, 0x0 };
10700
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
10701
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
10702
+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 };
10703
+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
10704
+ 0x0, 0x0, 0x0, 0x0,
10705
+ 0x0, 0x0, 0x0, 0x0,
10706
+ 0x0, 0x0, 0x0, 0x0 };
10707
+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
10708
+ 0x0, 0x0, 0x0, 0x0 };
10709
+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
10710
+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 };
10712
+/* Expected values of cumulative_saturation flag with shift by 1. */
10713
+int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0;
10714
+int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0;
10715
+int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0;
10716
+int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0;
10717
+int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0;
10718
+int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0;
10719
+int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0;
10720
+int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0;
10722
+/* Expected results with shift by 1. */
10723
+VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
10724
+ 0xfe, 0xfe, 0xfe, 0xfe };
10725
+VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
10726
+VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
10727
+VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe };
10728
+VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
10729
+ 0xfe, 0xfe, 0xfe, 0xfe,
10730
+ 0xfe, 0xfe, 0xfe, 0xfe,
10731
+ 0xfe, 0xfe, 0xfe, 0xfe };
10732
+VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe,
10733
+ 0xfffe, 0xfffe, 0xfffe, 0xfffe };
10734
+VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe,
10735
+ 0xfffffffe, 0xfffffffe };
10736
+VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe,
10737
+ 0xfffffffffffffffe };
10739
+/* Expected values of cumulative_saturation flag with shift by 2. */
10740
+int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1;
10741
+int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1;
10742
+int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1;
10743
+int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1;
10744
+int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1;
10745
+int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1;
10746
+int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1;
10747
+int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1;
10749
+/* Expected results with shift by 2. */
10750
+VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
10751
+ 0xff, 0xff, 0xff, 0xff };
10752
+VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
10753
+VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
10754
+VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff };
10755
+VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
10756
+ 0xff, 0xff, 0xff, 0xff,
10757
+ 0xff, 0xff, 0xff, 0xff,
10758
+ 0xff, 0xff, 0xff, 0xff };
10759
+VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
10760
+ 0xffff, 0xffff, 0xffff, 0xffff };
10761
+VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
10762
+ 0xffffffff, 0xffffffff };
10763
+VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff,
10764
+ 0xffffffffffffffff };
10766
+/* Expected values of cumulative_saturation flag. */
10767
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
10768
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
10769
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
10770
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
10771
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
10772
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
10773
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
10774
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
10776
+/* Expected results. */
10777
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
10778
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 };
10779
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 };
10780
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 };
10781
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0,
10782
+ 0xa0, 0xa0, 0xa0, 0xa0,
10783
+ 0xa0, 0xa0, 0xa0, 0xa0,
10784
+ 0xa0, 0xa0, 0xa0, 0xa0 };
10785
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180,
10786
+ 0x180, 0x180, 0x180, 0x180 };
10787
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 };
10788
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 };
10791
+#define INSN vqshlu
10792
+#define TEST_MSG "VQSHLU_N/VQSHLUQ_N"
10794
+#define FNNAME1(NAME) void exec_ ## NAME ## _n(void)
10795
+#define FNNAME(NAME) FNNAME1(NAME)
10799
+ /* Basic test: v2=vqshlu_n(v1,v), then store the result. */
10800
+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
10801
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \
10802
+ VECT_VAR(vector_res, T3, W, N) = \
10803
+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
10805
+ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \
10806
+ VECT_VAR(vector_res, T3, W, N)); \
10807
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
10809
+ /* Two auxliary macros are necessary to expand INSN */
10810
+#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
10811
+ TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
10813
+#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
10814
+ TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
10817
+ DECL_VARIABLE_ALL_VARIANTS(vector);
10818
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
10820
+ clean_results ();
10822
+ /* Fill input vector with negative values, to check saturation on
10824
+ VDUP(vector, , int, s, 8, 8, -1);
10825
+ VDUP(vector, , int, s, 16, 4, -2);
10826
+ VDUP(vector, , int, s, 32, 2, -3);
10827
+ VDUP(vector, , int, s, 64, 1, -4);
10828
+ VDUP(vector, q, int, s, 8, 16, -1);
10829
+ VDUP(vector, q, int, s, 16, 8, -2);
10830
+ VDUP(vector, q, int, s, 32, 4, -3);
10831
+ VDUP(vector, q, int, s, 64, 2, -4);
10833
+ /* Choose shift amount arbitrarily. */
10834
+#define CMT " (negative input)"
10835
+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT);
10836
+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT);
10837
+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT);
10838
+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT);
10839
+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT);
10840
+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT);
10841
+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT);
10842
+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT);
10844
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
10845
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
10846
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
10847
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
10848
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
10849
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
10850
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
10851
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
10854
+ /* Fill input vector with max value, to check saturation on
10856
+ VDUP(vector, , int, s, 8, 8, 0x7F);
10857
+ VDUP(vector, , int, s, 16, 4, 0x7FFF);
10858
+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
10859
+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
10860
+ VDUP(vector, q, int, s, 8, 16, 0x7F);
10861
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
10862
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
10863
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);
10865
+ /* shift by 1. */
10867
+#define CMT " (shift by 1)"
10868
+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT);
10869
+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT);
10870
+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT);
10871
+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT);
10872
+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT);
10873
+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT);
10874
+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT);
10875
+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT);
10877
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT);
10878
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT);
10879
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT);
10880
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT);
10881
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT);
10882
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT);
10883
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT);
10884
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT);
10886
+ /* shift by 2 to force saturation. */
10888
+#define CMT " (shift by 2)"
10889
+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT);
10890
+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT);
10891
+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT);
10892
+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT);
10893
+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT);
10894
+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT);
10895
+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT);
10896
+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT);
10898
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT);
10899
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT);
10900
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT);
10901
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT);
10902
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT);
10903
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT);
10904
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT);
10905
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT);
10908
+ /* Fill input vector with positive values, to check normal case. */
10909
+ VDUP(vector, , int, s, 8, 8, 1);
10910
+ VDUP(vector, , int, s, 16, 4, 2);
10911
+ VDUP(vector, , int, s, 32, 2, 3);
10912
+ VDUP(vector, , int, s, 64, 1, 4);
10913
+ VDUP(vector, q, int, s, 8, 16, 5);
10914
+ VDUP(vector, q, int, s, 16, 8, 6);
10915
+ VDUP(vector, q, int, s, 32, 4, 7);
10916
+ VDUP(vector, q, int, s, 64, 2, 8);
10918
+ /* Arbitrary shift amount. */
10921
+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT);
10922
+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);
10923
+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);
10924
+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT);
10925
+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT);
10926
+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT);
10927
+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT);
10928
+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT);
10930
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
10931
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
10932
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
10933
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
10934
+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
10935
+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
10936
+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
10937
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
10942
+ exec_vqshlu_n ();
10945
--- a/src//dev/null
10946
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c
10948
+#include <arm_neon.h>
10949
+#include "arm-neon-ref.h"
10950
+#include "compute-ref-data.h"
10952
+/* Expected values of cumulative_saturation flag. */
10953
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
10954
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
10955
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
10956
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
10957
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
10958
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
10960
+/* Expected results. */
10961
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
10962
+ 0xfa, 0xfa, 0xfb, 0xfb };
10963
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 };
10964
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
10965
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
10966
+ 0xff, 0xff, 0xff, 0xff };
10967
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
10968
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
10970
+/* Expected values of cumulative_saturation flag with max input value
10972
+int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1;
10973
+int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1;
10974
+int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1;
10975
+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1;
10976
+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1;
10977
+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1;
10979
+/* Expected results with max input value shifted by 3. */
10980
+VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
10981
+ 0x7f, 0x7f, 0x7f, 0x7f };
10982
+VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
10983
+VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
10984
+VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
10985
+ 0xff, 0xff, 0xff, 0xff };
10986
+VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff,
10987
+ 0xffff, 0xffff };
10988
+VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
10990
+/* Expected values of cumulative_saturation flag with max input value
10991
+ shifted by type size. */
10992
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;
10993
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;
10994
+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;
10995
+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0;
10996
+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0;
10997
+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0;
10999
+/* Expected results with max input value shifted by type size. */
11000
+VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
11001
+ 0x7f, 0x7f, 0x7f, 0x7f };
11002
+VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff,
11003
+ 0x7fff, 0x7fff };
11004
+VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
11005
+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
11006
+ 0xff, 0xff, 0xff, 0xff };
11007
+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff,
11008
+ 0xffff, 0xffff };
11009
+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };
11011
+#define INSN vqshrn_n
11012
+#define TEST_MSG "VQSHRN_N"
11014
+#define FNNAME1(NAME) void exec_ ## NAME (void)
11015
+#define FNNAME(NAME) FNNAME1(NAME)
11019
+ /* Basic test: y=vqshrn_n(x,v), then store the result. */
11020
+#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11021
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \
11022
+ VECT_VAR(vector_res, T1, W2, N) = \
11023
+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \
11025
+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \
11026
+ VECT_VAR(vector_res, T1, W2, N)); \
11027
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
11029
+ /* Two auxliary macros are necessary to expand INSN */
11030
+#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11031
+ TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
11033
+#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11034
+ TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
11037
+ /* vector is twice as large as vector_res. */
11038
+ DECL_VARIABLE(vector, int, 16, 8);
11039
+ DECL_VARIABLE(vector, int, 32, 4);
11040
+ DECL_VARIABLE(vector, int, 64, 2);
11041
+ DECL_VARIABLE(vector, uint, 16, 8);
11042
+ DECL_VARIABLE(vector, uint, 32, 4);
11043
+ DECL_VARIABLE(vector, uint, 64, 2);
11045
+ DECL_VARIABLE(vector_res, int, 8, 8);
11046
+ DECL_VARIABLE(vector_res, int, 16, 4);
11047
+ DECL_VARIABLE(vector_res, int, 32, 2);
11048
+ DECL_VARIABLE(vector_res, uint, 8, 8);
11049
+ DECL_VARIABLE(vector_res, uint, 16, 4);
11050
+ DECL_VARIABLE(vector_res, uint, 32, 2);
11052
+ clean_results ();
11054
+ VLOAD(vector, buffer, q, int, s, 16, 8);
11055
+ VLOAD(vector, buffer, q, int, s, 32, 4);
11056
+ VLOAD(vector, buffer, q, int, s, 64, 2);
11057
+ VLOAD(vector, buffer, q, uint, u, 16, 8);
11058
+ VLOAD(vector, buffer, q, uint, u, 32, 4);
11059
+ VLOAD(vector, buffer, q, uint, u, 64, 2);
11061
+ /* Choose shift amount arbitrarily. */
11063
+ TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);
11064
+ TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);
11065
+ TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);
11066
+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);
11067
+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);
11068
+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);
11070
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
11071
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
11072
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
11073
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
11074
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
11075
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
11078
+ /* Use max possible value as input. */
11079
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
11080
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
11081
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
11082
+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
11083
+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
11084
+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
11087
+#define CMT " (check saturation: shift by 3)"
11088
+ TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
11089
+ TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
11090
+ TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
11091
+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);
11092
+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);
11093
+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);
11095
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT);
11096
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT);
11097
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT);
11098
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT);
11099
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT);
11100
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT);
11104
+#define CMT " (check saturation: shift by max)"
11105
+ TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
11106
+ TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
11107
+ TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
11108
+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);
11109
+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);
11110
+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);
11112
+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT);
11113
+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT);
11114
+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT);
11115
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);
11116
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);
11117
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);
11122
+ exec_vqshrn_n ();
11125
--- a/src//dev/null
11126
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c
11128
+#include <arm_neon.h>
11129
+#include "arm-neon-ref.h"
11130
+#include "compute-ref-data.h"
11132
+/* Expected values of cumulative_saturation flag with negative input. */
11133
+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;
11134
+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;
11135
+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;
11137
+/* Expected results with negative input. */
11138
+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
11139
+ 0x0, 0x0, 0x0, 0x0 };
11140
+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
11141
+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
11143
+/* Expected values of cumulative_saturation flag with max input value
11145
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;
11146
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;
11147
+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;
11149
+/* Expected results with max input value shifted by 1. */
11150
+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
11151
+ 0xff, 0xff, 0xff, 0xff };
11152
+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,
11153
+ 0xffff, 0xffff };
11154
+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };
11155
+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };
11157
+/* Expected values of cumulative_saturation flag. */
11158
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
11159
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
11160
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
11162
+/* Expected results. */
11163
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48,
11164
+ 0x48, 0x48, 0x48, 0x48 };
11165
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
11166
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe };
11169
+#define INSN vqshrun_n
11170
+#define TEST_MSG "VQSHRUN_N"
11172
+#define FNNAME1(NAME) void exec_ ## NAME (void)
11173
+#define FNNAME(NAME) FNNAME1(NAME)
11177
+ /* Basic test: y=vqshrun_n(x,v), then store the result. */
11178
+#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11179
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \
11180
+ VECT_VAR(vector_res, uint, W2, N) = \
11181
+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \
11183
+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \
11184
+ VECT_VAR(vector_res, uint, W2, N)); \
11185
+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
11187
+ /* Two auxliary macros are necessary to expand INSN */
11188
+#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11189
+ TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
11191
+#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
11192
+ TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
11195
+ /* vector is twice as large as vector_res. */
11196
+ DECL_VARIABLE(vector, int, 16, 8);
11197
+ DECL_VARIABLE(vector, int, 32, 4);
11198
+ DECL_VARIABLE(vector, int, 64, 2);
11200
+ DECL_VARIABLE(vector_res, uint, 8, 8);
11201
+ DECL_VARIABLE(vector_res, uint, 16, 4);
11202
+ DECL_VARIABLE(vector_res, uint, 32, 2);
11204
+ clean_results ();
11206
+ /* Fill input vector with negative values, to check saturation on
11208
+ VDUP(vector, q, int, s, 16, 8, -2);
11209
+ VDUP(vector, q, int, s, 32, 4, -3);
11210
+ VDUP(vector, q, int, s, 64, 2, -4);
11212
+ /* Choose shift amount arbitrarily. */
11213
+#define CMT " (negative input)"
11214
+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);
11215
+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);
11216
+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);
11218
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
11219
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
11220
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
11223
+ /* Fill input vector with max value, to check saturation on
11225
+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);
11226
+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
11227
+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
11230
+#define CMT " (check cumulative saturation)"
11231
+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);
11232
+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);
11233
+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);
11235
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);
11236
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);
11237
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);
11240
+ /* Fill input vector with positive values, to check normal case. */
11241
+ VDUP(vector, q, int, s, 16, 8, 0x1234);
11242
+ VDUP(vector, q, int, s, 32, 4, 0x87654321);
11243
+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
11247
+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);
11248
+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);
11249
+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);
11251
+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
11252
+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
11253
+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
11258
+ exec_vqshrun_n ();
11261
--- a/src//dev/null
11262
+++ b/src/gcc/testsuite/gcc.target/aarch64/c-output-template-4.c
11264
+/* { dg-do compile } */
11265
+/* { dg-options "-O0" } */
11270
+ __asm__ ("@ %c0" : : "S" (&test + 4));
11273
+/* { dg-final { scan-assembler "@ test\\+4" } } */
11274
--- a/src//dev/null
11275
+++ b/src/gcc/testsuite/gcc.target/aarch64/pow-sqrt-synth-1.c
11277
+/* { dg-do compile } */
11278
+/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */
11284
+ return __builtin_pow (a, -5.875);
11290
+ return __builtin_pow (a, 0.75f);
11296
+ return __builtin_pow (a, 1.0 + 0.00390625);
11302
+ return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375);
11307
+vecfoo (double *a)
11309
+ for (int i = 0; i < N; i++)
11310
+ a[i] = __builtin_pow (a[i], 1.25);
11313
+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */
11314
+/* { dg-final { cleanup-tree-dump "sincos" } } */
11315
\ No newline at end of file
11316
--- a/src//dev/null
11317
+++ b/src/gcc/testsuite/gcc.target/aarch64/pr65491_1.c
11319
+/* { dg-do compile } */
11320
+/* { dg-options "-O2" } */
11322
+typedef long double a __attribute__((vector_size (16)));
11325
+sum (a first, a second)
11327
+ return first + second;
11330
--- a/src/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c
11331
+++ b/src/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c
11332
@@ -235,8 +235,8 @@ test_vrshl_u64 (uint64x1_t a, int64x1_t b)
11333
return vrshl_u64 (a, b);
11336
-/* For int64x1_t, sshr...#63 is output instead of the equivalent cmlt...#0. */
11337
-/* { dg-final { scan-assembler-times "\\tsshr\\td\[0-9\]+" 2 } } */
11338
+/* For int64x1_t, sshr...#63 is equivalent to cmlt...#0. */
11339
+/* { dg-final { scan-assembler-times "\\t(?:sshr|cmlt)\\td\[0-9\]+" 2 } } */
11342
test_vshr_n_s64 (int64x1_t a)
11343
--- a/src//dev/null
11344
+++ b/src/gcc/testsuite/gcc.target/aarch64/unsigned-float.c
11346
+/* { dg-do compile } */
11347
+/* { dg-options "-O1" } */
11349
+#include <stdint.h>
11354
+ return (double)(float)x;
11360
+ return (float)(double)x;
11363
+/* { dg-final { scan-assembler-not "fcvt" } } */
11364
--- a/src//dev/null
11365
+++ b/src/gcc/testsuite/gcc.target/aarch64/vec_init_1.c
11367
+/* { dg-do run } */
11368
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-inline" } */
11370
+extern void abort (void);
11372
+typedef float float16x4_t __attribute__ ((vector_size ((16))));
11380
+ return (float16x4_t) { 0, 0, a, b };
11384
+main (int argc, char **argv)
11388
+ float16x4_t vec = make_vector ();
11389
+ if (vec[0] != 0 || vec[1] != 0 || vec[2] != a || vec[3] != b)
11394
+/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
11395
+/* What we want to check, is that make_vector does not stp the whole vector
11396
+ to the stack. Unfortunately here we scan the body of main() too, which may
11397
+ be a bit fragile - the test is currently passing only because of the option
11398
+ -fomit-frame-pointer which avoids use of stp in the prologue to main(). */
11399
+/* { dg-final { scan-assembler-not "stp\\t" } } */
11400
+/* { dg-final { cleanup-saved-temps } } */
11401
--- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
11402
+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
11403
@@ -54,11 +54,11 @@ test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data, \
11407
-/* Tests of vld2_dup and vld2q_dup. */
11408
+/* Tests of vld2_lane and vld2q_lane. */
11409
VARIANTS (TESTMETH, 2)
11410
-/* Tests of vld3_dup and vld3q_dup. */
11411
+/* Tests of vld3_lane and vld3q_lane. */
11412
VARIANTS (TESTMETH, 3)
11413
-/* Tests of vld4_dup and vld4q_dup. */
11414
+/* Tests of vld4_lane and vld4q_lane. */
11415
VARIANTS (TESTMETH, 4)
11417
#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
11418
--- a/src//dev/null
11419
+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_lane_1.c
11421
+/* { dg-do run } */
11422
+/* { dg-options "-O3 -fno-inline" } */
11424
+#include <arm_neon.h>
11426
+extern void abort (void);
11428
+#define VARIANTS(VARIANT, STRUCT) \
11429
+VARIANT (uint8, , 8, _u8, 6, STRUCT) \
11430
+VARIANT (uint16, , 4, _u16, 3, STRUCT) \
11431
+VARIANT (uint32, , 2, _u32, 1, STRUCT) \
11432
+VARIANT (uint64, , 1, _u64, 0, STRUCT) \
11433
+VARIANT (int8, , 8, _s8, 5, STRUCT) \
11434
+VARIANT (int16, , 4, _s16, 2, STRUCT) \
11435
+VARIANT (int32, , 2, _s32, 0, STRUCT) \
11436
+VARIANT (int64, , 1, _s64, 0, STRUCT) \
11437
+VARIANT (poly8, , 8, _p8, 7, STRUCT) \
11438
+VARIANT (poly16, , 4, _p16, 1, STRUCT) \
11439
+VARIANT (float32, , 2, _f32, 1, STRUCT) \
11440
+VARIANT (float64, , 1, _f64, 0, STRUCT) \
11441
+VARIANT (uint8, q, 16, _u8, 14, STRUCT) \
11442
+VARIANT (uint16, q, 8, _u16, 4, STRUCT) \
11443
+VARIANT (uint32, q, 4, _u32, 3, STRUCT) \
11444
+VARIANT (uint64, q, 2, _u64, 0, STRUCT) \
11445
+VARIANT (int8, q, 16, _s8, 13, STRUCT) \
11446
+VARIANT (int16, q, 8, _s16, 6, STRUCT) \
11447
+VARIANT (int32, q, 4, _s32, 2, STRUCT) \
11448
+VARIANT (int64, q, 2, _s64, 1, STRUCT) \
11449
+VARIANT (poly8, q, 16, _p8, 12, STRUCT) \
11450
+VARIANT (poly16, q, 8, _p16, 5, STRUCT) \
11451
+VARIANT (float32, q, 4, _f32, 1, STRUCT)\
11452
+VARIANT (float64, q, 2, _f64, 0, STRUCT)
11454
+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
11456
+test_vst##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data) \
11458
+ BASE##x##ELTS##x##STRUCT##_t vectors; \
11459
+ for (int i = 0; i < STRUCT; i++, data += ELTS) \
11460
+ vectors.val[i] = vld1##Q##SUFFIX (data); \
11461
+ BASE##_t temp[STRUCT]; \
11462
+ vst##STRUCT##Q##_lane##SUFFIX (temp, vectors, LANE); \
11463
+ for (int i = 0; i < STRUCT; i++) \
11465
+ if (temp[i] != vget##Q##_lane##SUFFIX (vectors.val[i], LANE)) \
11471
+/* Tests of vst2_lane and vst2q_lane. */
11472
+VARIANTS (TESTMETH, 2)
11473
+/* Tests of vst3_lane and vst3q_lane. */
11474
+VARIANTS (TESTMETH, 3)
11475
+/* Tests of vst4_lane and vst4q_lane. */
11476
+VARIANTS (TESTMETH, 4)
11478
+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
11479
+ if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data)) \
11483
+main (int argc, char **argv)
11485
+ /* Original data for all vector formats. */
11486
+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
11487
+ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
11488
+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
11489
+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
11491
+ VARIANTS (CHECK, 2);
11492
+ VARIANTS (CHECK, 3);
11493
+ VARIANTS (CHECK, 4);
11496
--- a/src//dev/null
11497
+++ b/src/gcc/testsuite/gcc.target/arm/bics_1.c
11499
+/* { dg-do run } */
11500
+/* { dg-options "-O2 --save-temps -fno-inline" } */
11501
+/* { dg-require-effective-target arm32 } */
11503
+extern void abort (void);
11506
+bics_si_test1 (int a, int b, int c)
11510
+ /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
11514
+ return b + d + c;
11518
+bics_si_test2 (int a, int b, int c)
11520
+ int d = a & ~(b << 3);
11522
+ /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" 1 } } */
11526
+ return b + d + c;
11534
+ x = bics_si_test1 (29, ~4, 5);
11535
+ if (x != ((29 & 4) + ~4 + 5))
11538
+ x = bics_si_test1 (5, ~2, 20);
11542
+ x = bics_si_test2 (35, ~4, 5);
11543
+ if (x != ((35 & ~(~4 << 3)) + ~4 + 5))
11546
+ x = bics_si_test2 (96, ~2, 20);
11552
+/* { dg-final { cleanup-saved-temps } } */
11553
--- a/src//dev/null
11554
+++ b/src/gcc/testsuite/gcc.target/arm/bics_2.c
11556
+/* { dg-do run } */
11557
+/* { dg-options "-O2 --save-temps -fno-inline" } */
11558
+/* { dg-require-effective-target arm32 } */
11560
+extern void abort (void);
11563
+bics_si_test1 (int a, int b, int c)
11567
+ /* { dg-final { scan-assembler-not "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" } } */
11568
+ /* { dg-final { scan-assembler-times "bic\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
11572
+ return b + d + c;
11576
+bics_si_test2 (int a, int b, int c)
11578
+ int d = a & ~(b << 3);
11580
+ /* { dg-final { scan-assembler-not "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" } } */
11581
+ /* { dg-final { scan-assembler "bic\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" } } */
11585
+ return b + d + c;
11593
+ x = bics_si_test1 (29, ~4, 5);
11594
+ if (x != ((29 & 4) + ~4 + 5))
11597
+ x = bics_si_test1 (5, ~2, 20);
11601
+ x = bics_si_test2 (35, ~4, 5);
11602
+ if (x != ((35 & ~(~4 << 3)) + ~4 + 5))
11605
+ x = bics_si_test2 (96, ~2, 20);
11612
+/* { dg-final { cleanup-saved-temps } } */
11613
--- a/src//dev/null
11614
+++ b/src/gcc/testsuite/gcc.target/arm/bics_3.c
11616
+/* { dg-do run } */
11617
+/* { dg-options "-O2 --save-temps -fno-inline" } */
11618
+/* { dg-require-effective-target arm32 } */
11620
+extern void abort (void);
11623
+bics_si_test (int a, int b)
11632
+bics_si_test2 (int a, int b)
11634
+ if (a & ~ (b << 2))
11646
+ if (bics_si_test (a, b))
11648
+ if (bics_si_test2 (c, b))
11653
+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
11654
+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl #2" 1 } } */
11656
+/* { dg-final { cleanup-saved-temps } } */
11657
--- a/src//dev/null
11658
+++ b/src/gcc/testsuite/gcc.target/arm/bics_4.c
11660
+/* { dg-do run } */
11661
+/* { dg-options "-O2 --save-temps -fno-inline" } */
11662
+/* { dg-require-effective-target arm32 } */
11664
+extern void abort (void);
11667
+bics_si_test1 (int a, int b, int c)
11669
+ if ((a & b) == a)
11676
+bics_si_test2 (int a, int b, int c)
11678
+ if ((a & b) == b)
11688
+ x = bics_si_test1 (0xf00d, 0xf11f, 0);
11692
+ x = bics_si_test1 (0xf11f, 0xf00d, 0);
11696
+ x = bics_si_test2 (0xf00d, 0xf11f, 0);
11700
+ x = bics_si_test2 (0xf11f, 0xf00d, 0);
11707
+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
11708
+/* { dg-final { cleanup-saved-temps } } */
11709
--- a/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c
11710
+++ b/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c
11711
@@ -58,18 +58,18 @@ GEN_COND_TESTS(vceq)
11712
/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
11713
/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11714
/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
11715
-/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11716
-/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11717
-/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11718
-/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11719
-/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11720
-/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11721
-/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11722
-/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11723
-/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
11724
-/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11725
-/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11726
-/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
11727
+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11728
+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11729
+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11730
+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11731
+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11732
+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11733
+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11734
+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11735
+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
11736
+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11737
+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11738
+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
11739
/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
11740
/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
11741
/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
11742
--- a/src//dev/null
11743
+++ b/src/gcc/testsuite/gcc.target/arm/pr26702.c
11745
+/* { dg-do compile { target arm*-*-eabi* } } */
11746
+/* { dg-final { scan-assembler "\\.size\[\\t \]+static_foo, 4" } } */
11748
+static int static_foo;
11749
--- a/src/gcc/testsuite/gcc.target/arm/pr42172-1.c
11750
+++ b/src/gcc/testsuite/gcc.target/arm/pr42172-1.c
11751
@@ -16,4 +16,4 @@ void init_A (struct A *this)
11755
-/* { dg-final { scan-assembler-times "ldr" 1 } } */
11756
+/* { dg-final { scan-assembler-times "str" 1 } } */
11757
--- a/src//dev/null
11758
+++ b/src/gcc/testsuite/gcc.target/arm/pr64208.c
11760
+/* { dg-do compile } */
11761
+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mcpu=*" } { "-mcpu=iwmmxt" } } */
11762
+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mabi=*" } { "-mabi=iwmmxt" } } */
11763
+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-march=*" } { "-march=iwmmxt" } } */
11764
+/* { dg-skip-if "Test is specific to ARM mode" { arm*-*-* } { "-mthumb" } { "" } } */
11765
+/* { dg-require-effective-target arm32 } */
11766
+/* { dg-require-effective-target arm_iwmmxt_ok } */
11767
+/* { dg-options "-O1 -mcpu=iwmmxt" } */
11769
+long long x6(void);
11770
+void x7(long long, long long);
11771
+void x8(long long);
11777
+ long long *x3 = x1;
11779
+ long long x4 = x0, x5 = x6();
11785
--- a/src//dev/null
11786
+++ b/src/gcc/testsuite/gcc.target/arm/pr64616.c
11788
+/* { dg-do compile } */
11789
+/* { dg-options "-O2 -fdump-rtl-cprop2" } */
11792
+unsigned int glob;
11797
+ while (f (glob));
11801
+/* { dg-final { scan-rtl-dump "GLOBAL COPY-PROP" "cprop2" } } */
11802
+/* { dg-final { cleanup-rtl-dump "cprop2" } } */
11803
--- a/src//dev/null
11804
+++ b/src/gcc/testsuite/gcc.target/arm/pr64818.c
11806
+/* { dg-do compile } */
11807
+/* { dg-options "-O1" } */
11810
+extern int foo1 (void);
11820
+ register int a asm ("r0") = 5;
11821
+ register char *b asm ("r1") = temp;
11822
+ register int c asm ("r2") = len;
11823
+ asm volatile ("mov %[r0], %[r0]\n mov %[r1], %[r1]\n mov %[r2], %[r2]\n"
11825
+ : [r0]"r"(a), [r1]"r"(b), [r2]"r"(c));
11827
+ for (i = 0; i < len; i++)
11829
+ if (temp[i] == 10)
11835
+/* { dg-final { scan-assembler "\[\\t \]+mov\ r1,\ r1" } } */
11836
--- a/src/gcc/testsuite/gcc.target/arm/pr65067.c
11837
+++ b/src/gcc/testsuite/gcc.target/arm/pr65067.c
11839
/* { dg-do compile } */
11840
+/* { dg-require-effective-target arm_thumb2_ok } */
11841
/* { dg-options "-mthumb -mcpu=cortex-m3 -O2" } */
11844
--- a/src//dev/null
11845
+++ b/src/gcc/testsuite/gcc.target/arm/pr65710.c
11847
+/* { dg-do compile } */
11848
+/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */
11849
+/* { dg-options "-mthumb -O2 -mfloat-abi=soft -w" } */
11850
+/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
11859
+enum { no_op, duplicate, pop_failure_jump, dummy_failure_jump };
11862
+ unsigned pointer;
11863
+} byte_fail_stack_elt_t;
11865
+typedef struct { unsigned avail; } byte_fail_stack_type;
11868
+ byte_fail_stack_elt_t word;
11870
+ unsigned match_null_string_p : 2;
11871
+ unsigned is_active : 1;
11872
+ unsigned ever_matched_something : 1;
11874
+} byte_register_info_type;
11882
+byte_re_match_2_internal_size2(const int p2, int p3, const int p4) {
11885
+ char k, l, m, n = h;
11886
+ byte_fail_stack_type o;
11887
+ byte_fail_stack_elt_t *q;
11888
+ unsigned int s = (unsigned int)h;
11890
+ char **v, *w, **x, **y, **t1;
11891
+ byte_register_info_type *z, *t2 = __builtin_alloca(s);
11892
+ x = __builtin_alloca(s);
11893
+ y = __builtin_alloca(s);
11894
+ z = __builtin_alloca(sizeof(byte_register_info_type));
11895
+ k = p4 + byte_re_match_2_internal_size2;
11899
+ if (h == h->used) {
11903
+ for (; i < s; i++)
11912
+ switch (*h->buffer++) {
11914
+ while (m && n ?: *g)
11916
+ y[*h->buffer] = z[*h->buffer].bits.match_null_string_p ? w == &a ?: w : w;
11920
+ while (r && z[r].bits.is_active)
11928
+ case dummy_failure_jump:
11931
+ if (z[*h->buffer].bits.ever_matched_something) {
11933
+ z[*h->buffer].bits.ever_matched_something = r = *h->buffer;
11934
+ for (; r + *(h->buffer + 1); r++) {
11940
+ case duplicate: {
11941
+ char *t3 = p2 + p3;
11945
+ if ((p3 ?: p4) == k)
11947
+ case pop_failure_jump:
11949
+ t2[c].word = q[o.avail];
11951
+ q = t4 = __builtin_allocamemcpy(t4 ?: (p <<= 1));
11958
+ for (; t5 >= t; t5--)
11959
+ v[t5] = q[--o.avail].pointer;
11960
+ switch (*h->buffer)
11961
+ case pop_failure_jump:
11967
--- a/src//dev/null
11968
+++ b/src/gcc/testsuite/gcc.target/arm/pr65729.c
11970
+/* { dg-do compile } */
11971
+/* { dg-require-effective-target arm_hard_vfp_ok } */
11972
+/* { dg-options "-O2 -march=armv7-a -mfloat-abi=hard -mfpu=vfpv3-d16" } */
11977
+ asm volatile ("" : "+gw" (x));
11980
--- a/src//dev/null
11981
+++ b/src/gcc/testsuite/gcc.target/arm/pr65924.c
11983
+/* { dg-do compile } */
11984
+/* { dg-require-effective-target arm_thumb2_ok } */
11985
+/* { dg-options "-O2 -mthumb" } */
11992
--- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
11993
+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
11994
@@ -27,9 +27,22 @@ load_lib gcc-dg.exp
11998
+# If the target hardware supports NEON, the default action is "run", otherwise
12000
+global dg-do-what-default
12001
+set save-dg-do-what-default ${dg-do-what-default}
12002
+if {![check_effective_target_arm_neon_ok]} then {
12004
+} elseif {[is-effective-target arm_neon_hw]} then {
12005
+ set dg-do-what-default run
12007
+ set dg-do-what-default compile
12011
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
12015
+set dg-do-what-default ${save-dg-do-what-default}
12017
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
12018
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
12020
/* Test the `vextQf32' ARM Neon intrinsic. */
12022
-/* { dg-do run } */
12023
-/* { dg-require-effective-target arm_neon_ok } */
12024
/* { dg-options "-save-temps -O3 -fno-inline" } */
12025
/* { dg-add-options arm_neon } */
12027
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
12028
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
12030
/* Test the `vextQp16' ARM Neon intrinsic. */
12032
-/* { dg-do run } */
12033
-/* { dg-require-effective-target arm_neon_ok } */
12034
/* { dg-options "-save-temps -O3 -fno-inline" } */
12035
/* { dg-add-options arm_neon } */
12037
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
12038
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
12040
/* Test the `vextQp64' ARM Neon intrinsic. */
12042
-/* { dg-do run } */
12043
/* { dg-require-effective-target arm_crypto_ok } */
12044
/* { dg-options "-save-temps -O3 -fno-inline" } */
12045
/* { dg-add-options arm_crypto } */
12046
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
12047
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
12049
/* Test the `vextQp8' ARM Neon intrinsic. */
12051
-/* { dg-do run } */
12052
-/* { dg-require-effective-target arm_neon_ok } */
12053
/* { dg-options "-save-temps -O3 -fno-inline" } */
12054
/* { dg-add-options arm_neon } */
12056
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
12057
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
12059
/* Test the `vextQs16' ARM Neon intrinsic. */
12061
-/* { dg-do run } */
12062
-/* { dg-require-effective-target arm_neon_ok } */
12063
/* { dg-options "-save-temps -O3 -fno-inline" } */
12064
/* { dg-add-options arm_neon } */
12066
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
12067
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
12069
/* Test the `vextQs32' ARM Neon intrinsic. */
12071
-/* { dg-do run } */
12072
-/* { dg-require-effective-target arm_neon_ok } */
12073
/* { dg-options "-save-temps -O3 -fno-inline" } */
12074
/* { dg-add-options arm_neon } */
12076
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
12077
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
12079
/* Test the `vextQs64' ARM Neon intrinsic. */
12081
-/* { dg-do run } */
12082
-/* { dg-require-effective-target arm_neon_ok } */
12083
/* { dg-options "-save-temps -O3 -fno-inline" } */
12084
/* { dg-add-options arm_neon } */
12086
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
12087
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
12089
/* Test the `vextQs8' ARM Neon intrinsic. */
12091
-/* { dg-do run } */
12092
-/* { dg-require-effective-target arm_neon_ok } */
12093
/* { dg-options "-save-temps -O3 -fno-inline" } */
12094
/* { dg-add-options arm_neon } */
12096
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
12097
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
12099
/* Test the `vextQu16' ARM Neon intrinsic. */
12101
-/* { dg-do run } */
12102
-/* { dg-require-effective-target arm_neon_ok } */
12103
/* { dg-options "-save-temps -O3 -fno-inline" } */
12104
/* { dg-add-options arm_neon } */
12106
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
12107
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
12109
/* Test the `vextQu32' ARM Neon intrinsic. */
12111
-/* { dg-do run } */
12112
-/* { dg-require-effective-target arm_neon_ok } */
12113
/* { dg-options "-save-temps -O3 -fno-inline" } */
12114
/* { dg-add-options arm_neon } */
12116
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
12117
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
12119
/* Test the `vextQu64' ARM Neon intrinsic. */
12121
-/* { dg-do run } */
12122
-/* { dg-require-effective-target arm_neon_ok } */
12123
/* { dg-options "-save-temps -O3 -fno-inline" } */
12124
/* { dg-add-options arm_neon } */
12126
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
12127
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
12129
/* Test the `vextQu8' ARM Neon intrinsic. */
12131
-/* { dg-do run } */
12132
-/* { dg-require-effective-target arm_neon_ok } */
12133
/* { dg-options "-save-temps -O3 -fno-inline" } */
12134
/* { dg-add-options arm_neon } */
12136
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
12137
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
12139
/* Test the `vextf32' ARM Neon intrinsic. */
12141
-/* { dg-do run } */
12142
-/* { dg-require-effective-target arm_neon_ok } */
12143
/* { dg-options "-save-temps -O3 -fno-inline" } */
12144
/* { dg-add-options arm_neon } */
12146
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
12147
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
12149
/* Test the `vextp16' ARM Neon intrinsic. */
12151
-/* { dg-do run } */
12152
-/* { dg-require-effective-target arm_neon_ok } */
12153
/* { dg-options "-save-temps -O3 -fno-inline" } */
12154
/* { dg-add-options arm_neon } */
12156
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
12157
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
12159
/* Test the `vextp64' ARM Neon intrinsic. */
12161
-/* { dg-do run } */
12162
/* { dg-require-effective-target arm_crypto_ok } */
12163
/* { dg-options "-save-temps -O3 -fno-inline" } */
12164
/* { dg-add-options arm_crypto } */
12165
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
12166
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
12168
/* Test the `vextp8' ARM Neon intrinsic. */
12170
-/* { dg-do run } */
12171
-/* { dg-require-effective-target arm_neon_ok } */
12172
/* { dg-options "-save-temps -O3 -fno-inline" } */
12173
/* { dg-add-options arm_neon } */
12175
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
12176
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
12178
/* Test the `vexts16' ARM Neon intrinsic. */
12180
-/* { dg-do run } */
12181
-/* { dg-require-effective-target arm_neon_ok } */
12182
/* { dg-options "-save-temps -O3 -fno-inline" } */
12183
/* { dg-add-options arm_neon } */
12185
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
12186
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
12188
/* Test the `vexts32' ARM Neon intrinsic. */
12190
-/* { dg-do run } */
12191
-/* { dg-require-effective-target arm_neon_ok } */
12192
/* { dg-options "-save-temps -O3 -fno-inline" } */
12193
/* { dg-add-options arm_neon } */
12195
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
12196
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
12198
/* Test the `vexts64' ARM Neon intrinsic. */
12200
-/* { dg-do run } */
12201
-/* { dg-require-effective-target arm_neon_ok } */
12202
/* { dg-options "-save-temps -O3 -fno-inline" } */
12203
/* { dg-add-options arm_neon } */
12205
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
12206
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
12208
/* Test the `vexts8' ARM Neon intrinsic. */
12210
-/* { dg-do run } */
12211
-/* { dg-require-effective-target arm_neon_ok } */
12212
/* { dg-options "-save-temps -O3 -fno-inline" } */
12213
/* { dg-add-options arm_neon } */
12215
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
12216
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
12218
/* Test the `vextu16' ARM Neon intrinsic. */
12220
-/* { dg-do run } */
12221
-/* { dg-require-effective-target arm_neon_ok } */
12222
/* { dg-options "-save-temps -O3 -fno-inline" } */
12223
/* { dg-add-options arm_neon } */
12225
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
12226
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
12228
/* Test the `vextu32' ARM Neon intrinsic. */
12230
-/* { dg-do run } */
12231
-/* { dg-require-effective-target arm_neon_ok } */
12232
/* { dg-options "-save-temps -O3 -fno-inline" } */
12233
/* { dg-add-options arm_neon } */
12235
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
12236
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
12238
/* Test the `vextu64' ARM Neon intrinsic. */
12240
-/* { dg-do run } */
12241
-/* { dg-require-effective-target arm_neon_ok } */
12242
/* { dg-options "-save-temps -O3 -fno-inline" } */
12243
/* { dg-add-options arm_neon } */
12245
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
12246
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
12248
/* Test the `vextu8' ARM Neon intrinsic. */
12250
-/* { dg-do run } */
12251
-/* { dg-require-effective-target arm_neon_ok } */
12252
/* { dg-options "-save-temps -O3 -fno-inline" } */
12253
/* { dg-add-options arm_neon } */
12255
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
12256
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
12258
/* Test the `vrev16p8' ARM Neon intrinsic. */
12260
-/* { dg-do run } */
12261
-/* { dg-require-effective-target arm_neon_ok } */
12262
/* { dg-options "-save-temps -fno-inline" } */
12263
/* { dg-add-options arm_neon } */
12265
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
12266
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
12268
/* Test the `vrev16q_p8' ARM Neon intrinsic. */
12270
-/* { dg-do run } */
12271
-/* { dg-require-effective-target arm_neon_ok } */
12272
/* { dg-options "-save-temps -fno-inline" } */
12273
/* { dg-add-options arm_neon } */
12275
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
12276
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
12278
/* Test the `vrev16q_s8' ARM Neon intrinsic. */
12280
-/* { dg-do run } */
12281
-/* { dg-require-effective-target arm_neon_ok } */
12282
/* { dg-options "-save-temps -fno-inline" } */
12283
/* { dg-add-options arm_neon } */
12285
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
12286
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
12288
/* Test the `vrev16q_u8' ARM Neon intrinsic. */
12290
-/* { dg-do run } */
12291
-/* { dg-require-effective-target arm_neon_ok } */
12292
/* { dg-options "-save-temps -fno-inline" } */
12293
/* { dg-add-options arm_neon } */
12295
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
12296
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
12298
/* Test the `vrev16s8' ARM Neon intrinsic. */
12300
-/* { dg-do run } */
12301
-/* { dg-require-effective-target arm_neon_ok } */
12302
/* { dg-options "-save-temps -fno-inline" } */
12303
/* { dg-add-options arm_neon } */
12305
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
12306
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
12308
/* Test the `vrev16u8' ARM Neon intrinsic. */
12310
-/* { dg-do run } */
12311
-/* { dg-require-effective-target arm_neon_ok } */
12312
/* { dg-options "-save-temps -fno-inline" } */
12313
/* { dg-add-options arm_neon } */
12315
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
12316
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
12318
/* Test the `vrev32p16' ARM Neon intrinsic. */
12320
-/* { dg-do run } */
12321
-/* { dg-require-effective-target arm_neon_ok } */
12322
/* { dg-options "-save-temps -fno-inline" } */
12323
/* { dg-add-options arm_neon } */
12325
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
12326
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
12328
/* Test the `vrev32p8' ARM Neon intrinsic. */
12330
-/* { dg-do run } */
12331
-/* { dg-require-effective-target arm_neon_ok } */
12332
/* { dg-options "-save-temps -fno-inline" } */
12333
/* { dg-add-options arm_neon } */
12335
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
12336
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
12338
/* Test the `vrev32q_p16' ARM Neon intrinsic. */
12340
-/* { dg-do run } */
12341
-/* { dg-require-effective-target arm_neon_ok } */
12342
/* { dg-options "-save-temps -fno-inline" } */
12343
/* { dg-add-options arm_neon } */
12345
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
12346
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
12348
/* Test the `vrev32q_p8' ARM Neon intrinsic. */
12350
-/* { dg-do run } */
12351
-/* { dg-require-effective-target arm_neon_ok } */
12352
/* { dg-options "-save-temps -fno-inline" } */
12353
/* { dg-add-options arm_neon } */
12355
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
12356
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
12358
/* Test the `vrev32q_s16' ARM Neon intrinsic. */
12360
-/* { dg-do run } */
12361
-/* { dg-require-effective-target arm_neon_ok } */
12362
/* { dg-options "-save-temps -fno-inline" } */
12363
/* { dg-add-options arm_neon } */
12365
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
12366
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
12368
/* Test the `vrev32q_s8' ARM Neon intrinsic. */
12370
-/* { dg-do run } */
12371
-/* { dg-require-effective-target arm_neon_ok } */
12372
/* { dg-options "-save-temps -fno-inline" } */
12373
/* { dg-add-options arm_neon } */
12375
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
12376
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
12378
/* Test the `vrev32q_u16' ARM Neon intrinsic. */
12380
-/* { dg-do run } */
12381
-/* { dg-require-effective-target arm_neon_ok } */
12382
/* { dg-options "-save-temps -fno-inline" } */
12383
/* { dg-add-options arm_neon } */
12385
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
12386
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
12388
/* Test the `vrev32q_u8' ARM Neon intrinsic. */
12390
-/* { dg-do run } */
12391
-/* { dg-require-effective-target arm_neon_ok } */
12392
/* { dg-options "-save-temps -fno-inline" } */
12393
/* { dg-add-options arm_neon } */
12395
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
12396
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
12398
/* Test the `vrev32s16' ARM Neon intrinsic. */
12400
-/* { dg-do run } */
12401
-/* { dg-require-effective-target arm_neon_ok } */
12402
/* { dg-options "-save-temps -fno-inline" } */
12403
/* { dg-add-options arm_neon } */
12405
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
12406
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
12408
/* Test the `vrev32s8' ARM Neon intrinsic. */
12410
-/* { dg-do run } */
12411
-/* { dg-require-effective-target arm_neon_ok } */
12412
/* { dg-options "-save-temps -fno-inline" } */
12413
/* { dg-add-options arm_neon } */
12415
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
12416
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
12418
/* Test the `vrev32u16' ARM Neon intrinsic. */
12420
-/* { dg-do run } */
12421
-/* { dg-require-effective-target arm_neon_ok } */
12422
/* { dg-options "-save-temps -fno-inline" } */
12423
/* { dg-add-options arm_neon } */
12425
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
12426
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
12428
/* Test the `vrev32u8' ARM Neon intrinsic. */
12430
-/* { dg-do run } */
12431
-/* { dg-require-effective-target arm_neon_ok } */
12432
/* { dg-options "-save-temps -fno-inline" } */
12433
/* { dg-add-options arm_neon } */
12435
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
12436
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
12438
/* Test the `vrev64f32' ARM Neon intrinsic. */
12440
-/* { dg-do run } */
12441
-/* { dg-require-effective-target arm_neon_ok } */
12442
/* { dg-options "-save-temps -fno-inline" } */
12443
/* { dg-add-options arm_neon } */
12445
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
12446
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
12448
/* Test the `vrev64p16' ARM Neon intrinsic. */
12450
-/* { dg-do run } */
12451
-/* { dg-require-effective-target arm_neon_ok } */
12452
/* { dg-options "-save-temps -fno-inline" } */
12453
/* { dg-add-options arm_neon } */
12455
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
12456
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
12458
/* Test the `vrev64p8' ARM Neon intrinsic. */
12460
-/* { dg-do run } */
12461
-/* { dg-require-effective-target arm_neon_ok } */
12462
/* { dg-options "-save-temps -fno-inline" } */
12463
/* { dg-add-options arm_neon } */
12465
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
12466
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
12468
/* Test the `vrev64q_f32' ARM Neon intrinsic. */
12470
-/* { dg-do run } */
12471
-/* { dg-require-effective-target arm_neon_ok } */
12472
/* { dg-options "-save-temps -fno-inline" } */
12473
/* { dg-add-options arm_neon } */
12475
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
12476
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
12478
/* Test the `vrev64q_p16' ARM Neon intrinsic. */
12480
-/* { dg-do run } */
12481
-/* { dg-require-effective-target arm_neon_ok } */
12482
/* { dg-options "-save-temps -fno-inline" } */
12483
/* { dg-add-options arm_neon } */
12485
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
12486
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
12488
/* Test the `vrev64q_p8' ARM Neon intrinsic. */
12490
-/* { dg-do run } */
12491
-/* { dg-require-effective-target arm_neon_ok } */
12492
/* { dg-options "-save-temps -fno-inline" } */
12493
/* { dg-add-options arm_neon } */
12495
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
12496
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
12498
/* Test the `vrev64q_s16' ARM Neon intrinsic. */
12500
-/* { dg-do run } */
12501
-/* { dg-require-effective-target arm_neon_ok } */
12502
/* { dg-options "-save-temps -fno-inline" } */
12503
/* { dg-add-options arm_neon } */
12505
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
12506
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
12508
/* Test the `vrev64q_s32' ARM Neon intrinsic. */
12510
-/* { dg-do run } */
12511
-/* { dg-require-effective-target arm_neon_ok } */
12512
/* { dg-options "-save-temps -fno-inline" } */
12513
/* { dg-add-options arm_neon } */
12515
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
12516
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
12518
/* Test the `vrev64q_s8' ARM Neon intrinsic. */
12520
-/* { dg-do run } */
12521
-/* { dg-require-effective-target arm_neon_ok } */
12522
/* { dg-options "-save-temps -fno-inline" } */
12523
/* { dg-add-options arm_neon } */
12525
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
12526
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
12528
/* Test the `vrev64q_u16' ARM Neon intrinsic. */
12530
-/* { dg-do run } */
12531
-/* { dg-require-effective-target arm_neon_ok } */
12532
/* { dg-options "-save-temps -fno-inline" } */
12533
/* { dg-add-options arm_neon } */
12535
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
12536
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
12538
/* Test the `vrev64q_u32' ARM Neon intrinsic. */
12540
-/* { dg-do run } */
12541
-/* { dg-require-effective-target arm_neon_ok } */
12542
/* { dg-options "-save-temps -fno-inline" } */
12543
/* { dg-add-options arm_neon } */
12545
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
12546
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
12548
/* Test the `vrev64q_u8' ARM Neon intrinsic. */
12550
-/* { dg-do run } */
12551
-/* { dg-require-effective-target arm_neon_ok } */
12552
/* { dg-options "-save-temps -fno-inline" } */
12553
/* { dg-add-options arm_neon } */
12555
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
12556
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
12558
/* Test the `vrev64s16' ARM Neon intrinsic. */
12560
-/* { dg-do run } */
12561
-/* { dg-require-effective-target arm_neon_ok } */
12562
/* { dg-options "-save-temps -fno-inline" } */
12563
/* { dg-add-options arm_neon } */
12565
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
12566
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
12568
/* Test the `vrev64s32' ARM Neon intrinsic. */
12570
-/* { dg-do run } */
12571
-/* { dg-require-effective-target arm_neon_ok } */
12572
/* { dg-options "-save-temps -fno-inline" } */
12573
/* { dg-add-options arm_neon } */
12575
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
12576
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
12578
/* Test the `vrev64s8' ARM Neon intrinsic. */
12580
-/* { dg-do run } */
12581
-/* { dg-require-effective-target arm_neon_ok } */
12582
/* { dg-options "-save-temps -fno-inline" } */
12583
/* { dg-add-options arm_neon } */
12585
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
12586
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
12588
/* Test the `vrev64u16' ARM Neon intrinsic. */
12590
-/* { dg-do run } */
12591
-/* { dg-require-effective-target arm_neon_ok } */
12592
/* { dg-options "-save-temps -fno-inline" } */
12593
/* { dg-add-options arm_neon } */
12595
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
12596
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
12598
/* Test the `vrev64u32' ARM Neon intrinsic. */
12600
-/* { dg-do run } */
12601
-/* { dg-require-effective-target arm_neon_ok } */
12602
/* { dg-options "-save-temps -fno-inline" } */
12603
/* { dg-add-options arm_neon } */
12605
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
12606
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
12608
/* Test the `vrev64u8' ARM Neon intrinsic. */
12610
-/* { dg-do run } */
12611
-/* { dg-require-effective-target arm_neon_ok } */
12612
/* { dg-options "-save-temps -fno-inline" } */
12613
/* { dg-add-options arm_neon } */
12615
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
12616
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
12618
/* Test the `vtrnf32' ARM Neon intrinsic. */
12620
-/* { dg-do run } */
12621
-/* { dg-require-effective-target arm_neon_ok } */
12622
/* { dg-options "-save-temps -O1 -fno-inline" } */
12623
/* { dg-add-options arm_neon } */
12625
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
12626
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
12628
/* Test the `vtrnp16' ARM Neon intrinsic. */
12630
-/* { dg-do run } */
12631
-/* { dg-require-effective-target arm_neon_ok } */
12632
/* { dg-options "-save-temps -O1 -fno-inline" } */
12633
/* { dg-add-options arm_neon } */
12635
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
12636
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
12638
/* Test the `vtrnp8' ARM Neon intrinsic. */
12640
-/* { dg-do run } */
12641
-/* { dg-require-effective-target arm_neon_ok } */
12642
/* { dg-options "-save-temps -O1 -fno-inline" } */
12643
/* { dg-add-options arm_neon } */
12645
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
12646
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
12648
/* Test the `vtrnQf32' ARM Neon intrinsic. */
12650
-/* { dg-do run } */
12651
-/* { dg-require-effective-target arm_neon_ok } */
12652
/* { dg-options "-save-temps -O1 -fno-inline" } */
12653
/* { dg-add-options arm_neon } */
12655
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
12656
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
12658
/* Test the `vtrnQp16' ARM Neon intrinsic. */
12660
-/* { dg-do run } */
12661
-/* { dg-require-effective-target arm_neon_ok } */
12662
/* { dg-options "-save-temps -O1 -fno-inline" } */
12663
/* { dg-add-options arm_neon } */
12665
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
12666
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
12668
/* Test the `vtrnQp8' ARM Neon intrinsic. */
12670
-/* { dg-do run } */
12671
-/* { dg-require-effective-target arm_neon_ok } */
12672
/* { dg-options "-save-temps -O1 -fno-inline" } */
12673
/* { dg-add-options arm_neon } */
12675
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
12676
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
12678
/* Test the `vtrnQs16' ARM Neon intrinsic. */
12680
-/* { dg-do run } */
12681
-/* { dg-require-effective-target arm_neon_ok } */
12682
/* { dg-options "-save-temps -O1 -fno-inline" } */
12683
/* { dg-add-options arm_neon } */
12685
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
12686
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
12688
/* Test the `vtrnQs32' ARM Neon intrinsic. */
12690
-/* { dg-do run } */
12691
-/* { dg-require-effective-target arm_neon_ok } */
12692
/* { dg-options "-save-temps -O1 -fno-inline" } */
12693
/* { dg-add-options arm_neon } */
12695
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
12696
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
12698
/* Test the `vtrnQs8' ARM Neon intrinsic. */
12700
-/* { dg-do run } */
12701
-/* { dg-require-effective-target arm_neon_ok } */
12702
/* { dg-options "-save-temps -O1 -fno-inline" } */
12703
/* { dg-add-options arm_neon } */
12705
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
12706
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
12708
/* Test the `vtrnQu16' ARM Neon intrinsic. */
12710
-/* { dg-do run } */
12711
-/* { dg-require-effective-target arm_neon_ok } */
12712
/* { dg-options "-save-temps -O1 -fno-inline" } */
12713
/* { dg-add-options arm_neon } */
12715
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
12716
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
12718
/* Test the `vtrnQu32' ARM Neon intrinsic. */
12720
-/* { dg-do run } */
12721
-/* { dg-require-effective-target arm_neon_ok } */
12722
/* { dg-options "-save-temps -O1 -fno-inline" } */
12723
/* { dg-add-options arm_neon } */
12725
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
12726
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
12728
/* Test the `vtrnQu8' ARM Neon intrinsic. */
12730
-/* { dg-do run } */
12731
-/* { dg-require-effective-target arm_neon_ok } */
12732
/* { dg-options "-save-temps -O1 -fno-inline" } */
12733
/* { dg-add-options arm_neon } */
12735
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
12736
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
12738
/* Test the `vtrns16' ARM Neon intrinsic. */
12740
-/* { dg-do run } */
12741
-/* { dg-require-effective-target arm_neon_ok } */
12742
/* { dg-options "-save-temps -O1 -fno-inline" } */
12743
/* { dg-add-options arm_neon } */
12745
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
12746
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
12748
/* Test the `vtrns32' ARM Neon intrinsic. */
12750
-/* { dg-do run } */
12751
-/* { dg-require-effective-target arm_neon_ok } */
12752
/* { dg-options "-save-temps -O1 -fno-inline" } */
12753
/* { dg-add-options arm_neon } */
12755
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
12756
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
12758
/* Test the `vtrns8' ARM Neon intrinsic. */
12760
-/* { dg-do run } */
12761
-/* { dg-require-effective-target arm_neon_ok } */
12762
/* { dg-options "-save-temps -O1 -fno-inline" } */
12763
/* { dg-add-options arm_neon } */
12765
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
12766
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
12768
/* Test the `vtrnu16' ARM Neon intrinsic. */
12770
-/* { dg-do run } */
12771
-/* { dg-require-effective-target arm_neon_ok } */
12772
/* { dg-options "-save-temps -O1 -fno-inline" } */
12773
/* { dg-add-options arm_neon } */
12775
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
12776
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
12778
/* Test the `vtrnu32' ARM Neon intrinsic. */
12780
-/* { dg-do run } */
12781
-/* { dg-require-effective-target arm_neon_ok } */
12782
/* { dg-options "-save-temps -O1 -fno-inline" } */
12783
/* { dg-add-options arm_neon } */
12785
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
12786
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
12788
/* Test the `vtrnu8' ARM Neon intrinsic. */
12790
-/* { dg-do run } */
12791
-/* { dg-require-effective-target arm_neon_ok } */
12792
/* { dg-options "-save-temps -O1 -fno-inline" } */
12793
/* { dg-add-options arm_neon } */
12795
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
12796
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
12798
/* Test the `vuzpf32' ARM Neon intrinsic. */
12800
-/* { dg-do run } */
12801
-/* { dg-require-effective-target arm_neon_ok } */
12802
/* { dg-options "-save-temps -O1 -fno-inline" } */
12803
/* { dg-add-options arm_neon } */
12805
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
12806
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
12808
/* Test the `vuzpp16' ARM Neon intrinsic. */
12810
-/* { dg-do run } */
12811
-/* { dg-require-effective-target arm_neon_ok } */
12812
/* { dg-options "-save-temps -O1 -fno-inline" } */
12813
/* { dg-add-options arm_neon } */
12815
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
12816
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
12818
/* Test the `vuzpp8' ARM Neon intrinsic. */
12820
-/* { dg-do run } */
12821
-/* { dg-require-effective-target arm_neon_ok } */
12822
/* { dg-options "-save-temps -O1 -fno-inline" } */
12823
/* { dg-add-options arm_neon } */
12825
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
12826
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
12828
/* Test the `vuzpQf32' ARM Neon intrinsic. */
12830
-/* { dg-do run } */
12831
-/* { dg-require-effective-target arm_neon_ok } */
12832
/* { dg-options "-save-temps -O1 -fno-inline" } */
12833
/* { dg-add-options arm_neon } */
12835
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
12836
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
12838
/* Test the `vuzpQp16' ARM Neon intrinsic. */
12840
-/* { dg-do run } */
12841
-/* { dg-require-effective-target arm_neon_ok } */
12842
/* { dg-options "-save-temps -O1 -fno-inline" } */
12843
/* { dg-add-options arm_neon } */
12845
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
12846
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
12848
/* Test the `vuzpQp8' ARM Neon intrinsic. */
12850
-/* { dg-do run } */
12851
-/* { dg-require-effective-target arm_neon_ok } */
12852
/* { dg-options "-save-temps -O1 -fno-inline" } */
12853
/* { dg-add-options arm_neon } */
12855
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
12856
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
12858
/* Test the `vuzpQs16' ARM Neon intrinsic. */
12860
-/* { dg-do run } */
12861
-/* { dg-require-effective-target arm_neon_ok } */
12862
/* { dg-options "-save-temps -O1 -fno-inline" } */
12863
/* { dg-add-options arm_neon } */
12865
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
12866
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
12868
/* Test the `vuzpQs32' ARM Neon intrinsic. */
12870
-/* { dg-do run } */
12871
-/* { dg-require-effective-target arm_neon_ok } */
12872
/* { dg-options "-save-temps -O1 -fno-inline" } */
12873
/* { dg-add-options arm_neon } */
12875
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
12876
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
12878
/* Test the `vuzpQs8' ARM Neon intrinsic. */
12880
-/* { dg-do run } */
12881
-/* { dg-require-effective-target arm_neon_ok } */
12882
/* { dg-options "-save-temps -O1 -fno-inline" } */
12883
/* { dg-add-options arm_neon } */
12885
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
12886
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
12888
/* Test the `vuzpQu16' ARM Neon intrinsic. */
12890
-/* { dg-do run } */
12891
-/* { dg-require-effective-target arm_neon_ok } */
12892
/* { dg-options "-save-temps -O1 -fno-inline" } */
12893
/* { dg-add-options arm_neon } */
12895
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
12896
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
12898
/* Test the `vuzpQu32' ARM Neon intrinsic. */
12900
-/* { dg-do run } */
12901
-/* { dg-require-effective-target arm_neon_ok } */
12902
/* { dg-options "-save-temps -O1 -fno-inline" } */
12903
/* { dg-add-options arm_neon } */
12905
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
12906
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
12908
/* Test the `vuzpQu8' ARM Neon intrinsic. */
12910
-/* { dg-do run } */
12911
-/* { dg-require-effective-target arm_neon_ok } */
12912
/* { dg-options "-save-temps -O1 -fno-inline" } */
12913
/* { dg-add-options arm_neon } */
12915
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
12916
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
12918
/* Test the `vuzps16' ARM Neon intrinsic. */
12920
-/* { dg-do run } */
12921
-/* { dg-require-effective-target arm_neon_ok } */
12922
/* { dg-options "-save-temps -O1 -fno-inline" } */
12923
/* { dg-add-options arm_neon } */
12925
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
12926
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
12928
/* Test the `vuzps32' ARM Neon intrinsic. */
12930
-/* { dg-do run } */
12931
-/* { dg-require-effective-target arm_neon_ok } */
12932
/* { dg-options "-save-temps -O1 -fno-inline" } */
12933
/* { dg-add-options arm_neon } */
12935
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
12936
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
12938
/* Test the `vuzps8' ARM Neon intrinsic. */
12940
-/* { dg-do run } */
12941
-/* { dg-require-effective-target arm_neon_ok } */
12942
/* { dg-options "-save-temps -O1 -fno-inline" } */
12943
/* { dg-add-options arm_neon } */
12945
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
12946
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
12948
/* Test the `vuzpu16' ARM Neon intrinsic. */
12950
-/* { dg-do run } */
12951
-/* { dg-require-effective-target arm_neon_ok } */
12952
/* { dg-options "-save-temps -O1 -fno-inline" } */
12953
/* { dg-add-options arm_neon } */
12955
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
12956
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
12958
/* Test the `vuzpu32' ARM Neon intrinsic. */
12960
-/* { dg-do run } */
12961
-/* { dg-require-effective-target arm_neon_ok } */
12962
/* { dg-options "-save-temps -O1 -fno-inline" } */
12963
/* { dg-add-options arm_neon } */
12965
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
12966
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
12968
/* Test the `vuzpu8' ARM Neon intrinsic. */
12970
-/* { dg-do run } */
12971
-/* { dg-require-effective-target arm_neon_ok } */
12972
/* { dg-options "-save-temps -O1 -fno-inline" } */
12973
/* { dg-add-options arm_neon } */
12975
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
12976
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
12978
/* Test the `vzipf32' ARM Neon intrinsic. */
12980
-/* { dg-do run } */
12981
-/* { dg-require-effective-target arm_neon_ok } */
12982
/* { dg-options "-save-temps -O1 -fno-inline" } */
12983
/* { dg-add-options arm_neon } */
12985
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
12986
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
12988
/* Test the `vzipp16' ARM Neon intrinsic. */
12990
-/* { dg-do run } */
12991
-/* { dg-require-effective-target arm_neon_ok } */
12992
/* { dg-options "-save-temps -O1 -fno-inline" } */
12993
/* { dg-add-options arm_neon } */
12995
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
12996
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
12998
/* Test the `vzipp8' ARM Neon intrinsic. */
13000
-/* { dg-do run } */
13001
-/* { dg-require-effective-target arm_neon_ok } */
13002
/* { dg-options "-save-temps -O1 -fno-inline" } */
13003
/* { dg-add-options arm_neon } */
13005
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
13006
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
13008
/* Test the `vzipQf32' ARM Neon intrinsic. */
13010
-/* { dg-do run } */
13011
-/* { dg-require-effective-target arm_neon_ok } */
13012
/* { dg-options "-save-temps -O1 -fno-inline" } */
13013
/* { dg-add-options arm_neon } */
13015
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
13016
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
13018
/* Test the `vzipQp16' ARM Neon intrinsic. */
13020
-/* { dg-do run } */
13021
-/* { dg-require-effective-target arm_neon_ok } */
13022
/* { dg-options "-save-temps -O1 -fno-inline" } */
13023
/* { dg-add-options arm_neon } */
13025
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
13026
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
13028
/* Test the `vzipQp8' ARM Neon intrinsic. */
13030
-/* { dg-do run } */
13031
-/* { dg-require-effective-target arm_neon_ok } */
13032
/* { dg-options "-save-temps -O1 -fno-inline" } */
13033
/* { dg-add-options arm_neon } */
13035
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
13036
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
13038
/* Test the `vzipQs16' ARM Neon intrinsic. */
13040
-/* { dg-do run } */
13041
-/* { dg-require-effective-target arm_neon_ok } */
13042
/* { dg-options "-save-temps -O1 -fno-inline" } */
13043
/* { dg-add-options arm_neon } */
13045
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
13046
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
13048
/* Test the `vzipQs32' ARM Neon intrinsic. */
13050
-/* { dg-do run } */
13051
-/* { dg-require-effective-target arm_neon_ok } */
13052
/* { dg-options "-save-temps -O1 -fno-inline" } */
13053
/* { dg-add-options arm_neon } */
13055
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
13056
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
13058
/* Test the `vzipQs8' ARM Neon intrinsic. */
13060
-/* { dg-do run } */
13061
-/* { dg-require-effective-target arm_neon_ok } */
13062
/* { dg-options "-save-temps -O1 -fno-inline" } */
13063
/* { dg-add-options arm_neon } */
13065
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
13066
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
13068
/* Test the `vzipQu16' ARM Neon intrinsic. */
13070
-/* { dg-do run } */
13071
-/* { dg-require-effective-target arm_neon_ok } */
13072
/* { dg-options "-save-temps -O1 -fno-inline" } */
13073
/* { dg-add-options arm_neon } */
13075
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
13076
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
13078
/* Test the `vzipQu32' ARM Neon intrinsic. */
13080
-/* { dg-do run } */
13081
-/* { dg-require-effective-target arm_neon_ok } */
13082
/* { dg-options "-save-temps -O1 -fno-inline" } */
13083
/* { dg-add-options arm_neon } */
13085
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
13086
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
13088
/* Test the `vzipQu8' ARM Neon intrinsic. */
13090
-/* { dg-do run } */
13091
-/* { dg-require-effective-target arm_neon_ok } */
13092
/* { dg-options "-save-temps -O1 -fno-inline" } */
13093
/* { dg-add-options arm_neon } */
13095
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
13096
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
13098
/* Test the `vzips16' ARM Neon intrinsic. */
13100
-/* { dg-do run } */
13101
-/* { dg-require-effective-target arm_neon_ok } */
13102
/* { dg-options "-save-temps -O1 -fno-inline" } */
13103
/* { dg-add-options arm_neon } */
13105
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
13106
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
13108
/* Test the `vzips32' ARM Neon intrinsic. */
13110
-/* { dg-do run } */
13111
-/* { dg-require-effective-target arm_neon_ok } */
13112
/* { dg-options "-save-temps -O1 -fno-inline" } */
13113
/* { dg-add-options arm_neon } */
13115
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
13116
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
13118
/* Test the `vzips8' ARM Neon intrinsic. */
13120
-/* { dg-do run } */
13121
-/* { dg-require-effective-target arm_neon_ok } */
13122
/* { dg-options "-save-temps -O1 -fno-inline" } */
13123
/* { dg-add-options arm_neon } */
13125
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
13126
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
13128
/* Test the `vzipu16' ARM Neon intrinsic. */
13130
-/* { dg-do run } */
13131
-/* { dg-require-effective-target arm_neon_ok } */
13132
/* { dg-options "-save-temps -O1 -fno-inline" } */
13133
/* { dg-add-options arm_neon } */
13135
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
13136
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
13138
/* Test the `vzipu32' ARM Neon intrinsic. */
13140
-/* { dg-do run } */
13141
-/* { dg-require-effective-target arm_neon_ok } */
13142
/* { dg-options "-save-temps -O1 -fno-inline" } */
13143
/* { dg-add-options arm_neon } */
13145
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
13146
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
13148
/* Test the `vzipu8' ARM Neon intrinsic. */
13150
-/* { dg-do run } */
13151
-/* { dg-require-effective-target arm_neon_ok } */
13152
/* { dg-options "-save-temps -O1 -fno-inline" } */
13153
/* { dg-add-options arm_neon } */
13155
--- a/src//dev/null
13156
+++ b/src/gcc/testsuite/gcc.target/arm/unsigned-float.c
13158
+/* { dg-do compile } */
13159
+/* { dg-require-effective-target arm_vfp_ok } */
13160
+/* { dg-options "-march=armv7-a -O1 -mfloat-abi=softfp" } */
13161
+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
13163
+#include <stdint.h>
13168
+ return (double)(float)x;
13174
+ return (float)(double)x;
13177
+/* { dg-final { scan-assembler-not "vcvt.(f32.f64|f64.f32)" } } */
13178
--- a/src/gcc/tree-ssa-loop-ivopts.c
13179
+++ b/src/gcc/tree-ssa-loop-ivopts.c
13180
@@ -226,6 +226,7 @@ struct cost_pair
13183
unsigned id; /* The id of the use. */
13184
+ unsigned sub_id; /* The id of the sub use. */
13185
enum use_type type; /* Type of the use. */
13186
struct iv *iv; /* The induction variable it is based on. */
13187
gimple stmt; /* Statement in that it occurs. */
13188
@@ -239,6 +240,11 @@ struct iv_use
13190
struct iv_cand *selected;
13191
/* The selected candidate. */
13193
+ struct iv_use *next; /* The next sub use. */
13194
+ tree addr_base; /* Base address with const offset stripped. */
13195
+ unsigned HOST_WIDE_INT addr_offset;
13196
+ /* Const offset stripped from base address. */
13199
/* The position where the iv is computed. */
13200
@@ -555,7 +561,11 @@ dump_iv (FILE *file, struct iv *iv)
13202
dump_use (FILE *file, struct iv_use *use)
13204
- fprintf (file, "use %d\n", use->id);
13205
+ fprintf (file, "use %d", use->id);
13207
+ fprintf (file, ".%d", use->sub_id);
13209
+ fprintf (file, "\n");
13213
@@ -604,8 +614,12 @@ dump_uses (FILE *file, struct ivopts_data *data)
13214
for (i = 0; i < n_iv_uses (data); i++)
13216
use = iv_use (data, i);
13218
- dump_use (file, use);
13221
+ dump_use (file, use);
13225
fprintf (file, "\n");
13228
@@ -1326,33 +1340,84 @@ find_induction_variables (struct ivopts_data *data)
13232
-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
13233
+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
13234
+ For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
13235
+ is the const offset stripped from IV base. For uses of other types,
13236
+ ADDR_BASE and ADDR_OFFSET are zero by default. */
13238
static struct iv_use *
13239
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
13240
- gimple stmt, enum use_type use_type)
13241
+ gimple stmt, enum use_type use_type, tree addr_base = NULL,
13242
+ unsigned HOST_WIDE_INT addr_offset = 0)
13244
struct iv_use *use = XCNEW (struct iv_use);
13246
use->id = n_iv_uses (data);
13248
use->type = use_type;
13252
use->related_cands = BITMAP_ALLOC (NULL);
13253
+ use->next = NULL;
13254
+ use->addr_base = addr_base;
13255
+ use->addr_offset = addr_offset;
13257
/* To avoid showing ssa name in the dumps, if it was not reset by the
13259
iv->ssa_name = NULL_TREE;
13261
- if (dump_file && (dump_flags & TDF_DETAILS))
13262
- dump_use (dump_file, use);
13264
data->iv_uses.safe_push (use);
13269
+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
13270
+ The sub use is recorded under the one whose use id is ID_GROUP. */
13272
+static struct iv_use *
13273
+record_sub_use (struct ivopts_data *data, tree *use_p,
13274
+ struct iv *iv, gimple stmt, enum use_type use_type,
13275
+ tree addr_base, unsigned HOST_WIDE_INT addr_offset,
13276
+ unsigned int id_group)
13278
+ struct iv_use *use = XCNEW (struct iv_use);
13279
+ struct iv_use *group = iv_use (data, id_group);
13281
+ use->id = group->id;
13283
+ use->type = use_type;
13285
+ use->stmt = stmt;
13286
+ use->op_p = use_p;
13287
+ use->related_cands = NULL;
13288
+ use->addr_base = addr_base;
13289
+ use->addr_offset = addr_offset;
13291
+ /* Sub use list is maintained in offset ascending order. */
13292
+ if (addr_offset <= group->addr_offset)
13294
+ use->related_cands = group->related_cands;
13295
+ group->related_cands = NULL;
13296
+ use->next = group;
13297
+ data->iv_uses[id_group] = use;
13301
+ struct iv_use *pre;
13305
+ group = group->next;
13307
+ while (group && addr_offset > group->addr_offset);
13308
+ use->next = pre->next;
13315
/* Checks whether OP is a loop-level invariant and if so, records it.
13316
NONLINEAR_USE is true if the invariant is used in a way we do not
13317
handle specially. */
13318
@@ -1837,6 +1902,50 @@ may_be_nonaddressable_p (tree expr)
13323
+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
13325
+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
13326
+ If there is an existing use which has same stripped iv base and step,
13327
+ this function records this one as a sub use to that; otherwise records
13328
+ it as a normal one. */
13330
+static struct iv_use *
13331
+record_group_use (struct ivopts_data *data, tree *use_p,
13332
+ struct iv *iv, gimple stmt, enum use_type use_type)
13335
+ struct iv_use *use;
13337
+ unsigned HOST_WIDE_INT addr_offset;
13339
+ /* Only support sub use for address type uses, that is, with base
13341
+ if (!iv->base_object)
13342
+ return record_use (data, use_p, iv, stmt, use_type);
13344
+ addr_base = strip_offset (iv->base, &addr_offset);
13345
+ for (i = 0; i < n_iv_uses (data); i++)
13347
+ use = iv_use (data, i);
13348
+ if (use->type != USE_ADDRESS || !use->iv->base_object)
13351
+ /* Check if it has the same stripped base and step. */
13352
+ if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
13353
+ && operand_equal_p (iv->step, use->iv->step, 0)
13354
+ && operand_equal_p (addr_base, use->addr_base, 0))
13358
+ if (i == n_iv_uses (data))
13359
+ return record_use (data, use_p, iv, stmt,
13360
+ use_type, addr_base, addr_offset);
13362
+ return record_sub_use (data, use_p, iv, stmt,
13363
+ use_type, addr_base, addr_offset, i);
13366
/* Finds addresses in *OP_P inside STMT. */
13369
@@ -1947,7 +2056,7 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
13372
civ = alloc_iv (base, step);
13373
- record_use (data, op_p, civ, stmt, USE_ADDRESS);
13374
+ record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
13378
@@ -2133,6 +2242,172 @@ find_interesting_uses (struct ivopts_data *data)
13382
+/* Compute maximum offset of [base + offset] addressing mode
13383
+ for memory reference represented by USE. */
13385
+static HOST_WIDE_INT
13386
+compute_max_addr_offset (struct iv_use *use)
13390
+ HOST_WIDE_INT i, off;
13391
+ unsigned list_index, num;
13393
+ machine_mode mem_mode, addr_mode;
13394
+ static vec<HOST_WIDE_INT> max_offset_list;
13396
+ as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
13397
+ mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
13399
+ num = max_offset_list.length ();
13400
+ list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
13401
+ if (list_index >= num)
13403
+ max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
13404
+ for (; num < max_offset_list.length (); num++)
13405
+ max_offset_list[num] = -1;
13408
+ off = max_offset_list[list_index];
13412
+ addr_mode = targetm.addr_space.address_mode (as);
13413
+ reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
13414
+ addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
13416
+ width = GET_MODE_BITSIZE (addr_mode) - 1;
13417
+ if (width > (HOST_BITS_PER_WIDE_INT - 1))
13418
+ width = HOST_BITS_PER_WIDE_INT - 1;
13420
+ for (i = width; i > 0; i--)
13422
+ off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
13423
+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);
13424
+ if (memory_address_addr_space_p (mem_mode, addr, as))
13427
+ /* For some strict-alignment targets, the offset must be naturally
13428
+ aligned. Try an aligned offset if mem_mode is not QImode. */
13429
+ off = ((unsigned HOST_WIDE_INT) 1 << i);
13430
+ if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
13432
+ off -= GET_MODE_SIZE (mem_mode);
13433
+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);
13434
+ if (memory_address_addr_space_p (mem_mode, addr, as))
13441
+ max_offset_list[list_index] = off;
13445
+/* Check if all small groups should be split. Return true if and
13448
+ 1) At least one groups contain two uses with different offsets.
13449
+ 2) No group contains more than two uses with different offsets.
13451
+ Return false otherwise. We want to split such groups because:
13453
+ 1) Small groups don't have much benefit and may interfer with
13454
+ general candidate selection.
13455
+ 2) Size for problem with only small groups is usually small and
13456
+ general algorithm can handle it well.
13458
+ TODO -- Above claim may not hold when auto increment is supported. */
13461
+split_all_small_groups (struct ivopts_data *data)
13463
+ bool split_p = false;
13464
+ unsigned int i, n, distinct;
13465
+ struct iv_use *pre, *use;
13467
+ n = n_iv_uses (data);
13468
+ for (i = 0; i < n; i++)
13470
+ use = iv_use (data, i);
13475
+ gcc_assert (use->type == USE_ADDRESS);
13476
+ for (pre = use, use = use->next; use; pre = use, use = use->next)
13478
+ if (pre->addr_offset != use->addr_offset)
13481
+ if (distinct > 2)
13484
+ if (distinct == 2)
13491
+/* For each group of address type uses, this function further groups
13492
+ these uses according to the maximum offset supported by target's
13493
+ [base + offset] addressing mode. */
13496
+group_address_uses (struct ivopts_data *data)
13498
+ HOST_WIDE_INT max_offset = -1;
13499
+ unsigned int i, n, sub_id;
13500
+ struct iv_use *pre, *use;
13501
+ unsigned HOST_WIDE_INT addr_offset_first;
13503
+ /* Reset max offset to split all small groups. */
13504
+ if (split_all_small_groups (data))
13507
+ n = n_iv_uses (data);
13508
+ for (i = 0; i < n; i++)
13510
+ use = iv_use (data, i);
13514
+ gcc_assert (use->type == USE_ADDRESS);
13515
+ if (max_offset != 0)
13516
+ max_offset = compute_max_addr_offset (use);
13521
+ addr_offset_first = use->addr_offset;
13522
+ /* Only uses with offset that can fit in offset part against
13523
+ the first use can be grouped together. */
13524
+ for (pre = use, use = use->next;
13525
+ use && (use->addr_offset - addr_offset_first
13526
+ <= (unsigned HOST_WIDE_INT) max_offset);
13527
+ pre = use, use = use->next)
13529
+ use->id = pre->id;
13530
+ use->sub_id = ++sub_id;
13533
+ /* Break the list and create new group. */
13536
+ pre->next = NULL;
13537
+ use->id = n_iv_uses (data);
13538
+ use->related_cands = BITMAP_ALLOC (NULL);
13539
+ data->iv_uses.safe_push (use);
13544
+ if (dump_file && (dump_flags & TDF_DETAILS))
13545
+ dump_uses (dump_file, data);
13548
/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
13549
is true, assume we are inside an address. If TOP_COMPREF is true, assume
13550
we are at the top-level of the processed address. */
13551
@@ -2556,6 +2831,8 @@ static void
13552
add_candidate (struct ivopts_data *data,
13553
tree base, tree step, bool important, struct iv_use *use)
13555
+ gcc_assert (use == NULL || use->sub_id == 0);
13557
if (ip_normal_pos (data->current_loop))
13558
add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
13559
if (ip_end_pos (data->current_loop)
13560
@@ -2785,11 +3062,22 @@ new_cost (unsigned runtime, unsigned complexity)
13564
+/* Returns true if COST is infinite. */
13567
+infinite_cost_p (comp_cost cost)
13569
+ return cost.cost == INFTY;
13572
/* Adds costs COST1 and COST2. */
13575
add_costs (comp_cost cost1, comp_cost cost2)
13577
+ if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
13578
+ return infinite_cost;
13580
cost1.cost += cost2.cost;
13581
cost1.complexity += cost2.complexity;
13583
@@ -2818,14 +3106,6 @@ compare_costs (comp_cost cost1, comp_cost cost2)
13584
return cost1.cost - cost2.cost;
13587
-/* Returns true if COST is infinite. */
13590
-infinite_cost_p (comp_cost cost)
13592
- return cost.cost == INFTY;
13595
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
13596
on invariants DEPENDS_ON and that the value used in expressing it
13597
is VALUE, and in case of iv elimination the comparison operator is COMP. */
13598
@@ -4300,7 +4580,15 @@ get_computation_cost_at (struct ivopts_data *data,
13599
cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
13603
+ /* Set of invariants depended on by sub use has already been computed
13604
+ for the first use in the group. */
13608
+ if (depends_on && *depends_on)
13609
+ bitmap_clear (*depends_on);
13611
+ else if (inv_expr_id)
13614
get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
13615
@@ -4429,6 +4717,8 @@ determine_use_iv_cost_address (struct ivopts_data *data,
13618
int inv_expr_id = -1;
13619
+ struct iv_use *sub_use;
13620
+ comp_cost sub_cost;
13621
comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
13622
&can_autoinc, &inv_expr_id);
13624
@@ -4442,6 +4732,15 @@ determine_use_iv_cost_address (struct ivopts_data *data,
13625
else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
13626
cost = infinite_cost;
13628
+ for (sub_use = use->next;
13629
+ sub_use && !infinite_cost_p (cost);
13630
+ sub_use = sub_use->next)
13632
+ sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
13633
+ &can_autoinc, &inv_expr_id);
13634
+ cost = add_costs (cost, sub_cost);
13637
set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
13640
@@ -6588,8 +6887,8 @@ adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
13641
/* Rewrites USE (address that is an iv) using candidate CAND. */
13644
-rewrite_use_address (struct ivopts_data *data,
13645
- struct iv_use *use, struct iv_cand *cand)
13646
+rewrite_use_address_1 (struct ivopts_data *data,
13647
+ struct iv_use *use, struct iv_cand *cand)
13650
gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
13651
@@ -6624,6 +6923,28 @@ rewrite_use_address (struct ivopts_data *data,
13655
+/* Rewrites USE (address that is an iv) using candidate CAND. If it's the
13656
+ first use of a group, rewrites sub uses in the group too. */
13659
+rewrite_use_address (struct ivopts_data *data,
13660
+ struct iv_use *use, struct iv_cand *cand)
13662
+ struct iv_use *next;
13664
+ gcc_assert (use->sub_id == 0);
13665
+ rewrite_use_address_1 (data, use, cand);
13666
+ update_stmt (use->stmt);
13668
+ for (next = use->next; next != NULL; next = next->next)
13670
+ rewrite_use_address_1 (data, next, cand);
13671
+ update_stmt (next->stmt);
13677
/* Rewrites USE (the condition such that one of the arguments is an iv) using
13680
@@ -6899,6 +7220,18 @@ free_loop_data (struct ivopts_data *data)
13681
for (i = 0; i < n_iv_uses (data); i++)
13683
struct iv_use *use = iv_use (data, i);
13684
+ struct iv_use *pre = use, *sub = use->next;
13688
+ gcc_assert (sub->related_cands == NULL);
13689
+ gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
13698
BITMAP_FREE (use->related_cands);
13699
@@ -7025,6 +7358,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
13701
/* Finds interesting uses (item 1). */
13702
find_interesting_uses (data);
13703
+ group_address_uses (data);
13704
if (n_iv_uses (data) > MAX_CONSIDERED_USES)
13707
--- a/src/gcc/tree-ssa-math-opts.c
13708
+++ b/src/gcc/tree-ssa-math-opts.c
13709
@@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see
13710
#include "target.h"
13711
#include "gimple-pretty-print.h"
13712
#include "builtins.h"
13713
+#include "params.h"
13715
/* FIXME: RTL headers have to be included here for optabs. */
13716
#include "rtl.h" /* Because optabs.h wants enum rtx_code. */
13717
@@ -1148,6 +1149,357 @@ build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
13721
+struct pow_synth_sqrt_info
13724
+ unsigned int deepest;
13725
+ unsigned int num_mults;
13728
+/* Return true iff the real value C can be represented as a
13729
+ sum of powers of 0.5 up to N. That is:
13730
+ C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
13731
+ Record in INFO the various parameters of the synthesis algorithm such
13732
+ as the factors a[i], the maximum 0.5 power and the number of
13733
+ multiplications that will be required. */
13736
+representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
13737
+ struct pow_synth_sqrt_info *info)
13739
+ REAL_VALUE_TYPE factor = dconsthalf;
13740
+ REAL_VALUE_TYPE remainder = c;
13742
+ info->deepest = 0;
13743
+ info->num_mults = 0;
13744
+ memset (info->factors, 0, n * sizeof (bool));
13746
+ for (unsigned i = 0; i < n; i++)
13748
+ REAL_VALUE_TYPE res;
13750
+ /* If something inexact happened bail out now. */
13751
+ if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
13754
+ /* We have hit zero. The number is representable as a sum
13755
+ of powers of 0.5. */
13756
+ if (REAL_VALUES_EQUAL (res, dconst0))
13758
+ info->factors[i] = true;
13759
+ info->deepest = i + 1;
13762
+ else if (!REAL_VALUE_NEGATIVE (res))
13765
+ info->factors[i] = true;
13766
+ info->num_mults++;
13769
+ info->factors[i] = false;
13771
+ REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
13776
+/* Return the tree corresponding to FN being applied
13777
+ to ARG N times at GSI and LOC.
13778
+ Look up previous results from CACHE if need be.
13779
+ cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
13782
+get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
13783
+ tree fn, location_t loc, tree *cache)
13785
+ tree res = cache[n];
13788
+ tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
13789
+ res = build_and_insert_call (gsi, loc, fn, prev);
13796
+/* Print to STREAM the repeated application of function FNAME to ARG
13797
+ N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
13798
+ "foo (foo (x))". */
13801
+print_nested_fn (FILE* stream, const char *fname, const char* arg,
13805
+ fprintf (stream, "%s", arg);
13808
+ fprintf (stream, "%s (", fname);
13809
+ print_nested_fn (stream, fname, arg, n - 1);
13810
+ fprintf (stream, ")");
13814
+/* Print to STREAM the fractional sequence of sqrt chains
13815
+ applied to ARG, described by INFO. Used for the dump file. */
13818
+dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
13819
+ struct pow_synth_sqrt_info *info)
13821
+ for (unsigned int i = 0; i < info->deepest; i++)
13823
+ bool is_set = info->factors[i];
13826
+ print_nested_fn (stream, "sqrt", arg, i + 1);
13827
+ if (i != info->deepest - 1)
13828
+ fprintf (stream, " * ");
13833
+/* Print to STREAM a representation of raising ARG to an integer
13834
+ power N. Used for the dump file. */
13837
+dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
13840
+ fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
13842
+ fprintf (stream, "%s", arg);
13845
+/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
13846
+ square roots. Place at GSI and LOC. Limit the maximum depth
13847
+ of the sqrt chains to MAX_DEPTH. Return the tree holding the
13848
+ result of the expanded sequence or NULL_TREE if the expansion failed.
13850
+ This routine assumes that ARG1 is a real number with a fractional part
13851
+ (the integer exponent case will have been handled earlier in
13852
+ gimple_expand_builtin_pow).
13855
+ * For ARG1 composed of a whole part WHOLE_PART and a fractional part
13856
+ FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
13857
+ FRAC_PART == ARG1 - WHOLE_PART:
13858
+ Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
13859
+ POW (ARG0, FRAC_PART) is expanded as a product of square root chains
13860
+ if it can be expressed as such, that is if FRAC_PART satisfies:
13861
+ FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
13862
+ where integer a[i] is either 0 or 1.
13865
+ POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
13866
+ --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
13868
+ For ARG1 < 0.0 there are two approaches:
13869
+ * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
13870
+ is calculated as above.
13873
+ POW (x, -5.625) == 1.0 / POW (x, 5.625)
13874
+ --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
13876
+ * (B) : WHOLE_PART := - ceil (abs (ARG1))
13877
+ FRAC_PART := ARG1 - WHOLE_PART
13878
+ and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
13880
+ POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
13881
+ --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
13883
+ For ARG1 < 0.0 we choose between (A) and (B) depending on
13884
+ how many multiplications we'd have to do.
13885
+ So, for the example in (B): POW (x, -5.875), if we were to
13886
+ follow algorithm (A) we would produce:
13887
+ 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
13888
+ which contains more multiplications than approach (B).
13890
+ Hopefully, this approach will eliminate potentially expensive POW library
13891
+ calls when unsafe floating point math is enabled and allow the compiler to
13892
+ further optimise the multiplies, square roots and divides produced by this
13896
+expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
13897
+ tree arg0, tree arg1, HOST_WIDE_INT max_depth)
13899
+ tree type = TREE_TYPE (arg0);
13900
+ machine_mode mode = TYPE_MODE (type);
13901
+ tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
13902
+ bool one_over = true;
13905
+ return NULL_TREE;
13907
+ if (TREE_CODE (arg1) != REAL_CST)
13908
+ return NULL_TREE;
13910
+ REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
13912
+ gcc_assert (max_depth > 0);
13913
+ tree *cache = XALLOCAVEC (tree, max_depth + 1);
13915
+ struct pow_synth_sqrt_info synth_info;
13916
+ synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
13917
+ synth_info.deepest = 0;
13918
+ synth_info.num_mults = 0;
13920
+ bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
13921
+ REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
13923
+ /* The whole and fractional parts of exp. */
13924
+ REAL_VALUE_TYPE whole_part;
13925
+ REAL_VALUE_TYPE frac_part;
13927
+ real_floor (&whole_part, mode, &exp);
13928
+ REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
13931
+ REAL_VALUE_TYPE ceil_whole = dconst0;
13932
+ REAL_VALUE_TYPE ceil_fract = dconst0;
13936
+ real_ceil (&ceil_whole, mode, &exp);
13937
+ REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
13940
+ if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
13941
+ return NULL_TREE;
13943
+ /* Check whether it's more profitable to not use 1.0 / ... */
13946
+ struct pow_synth_sqrt_info alt_synth_info;
13947
+ alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
13948
+ alt_synth_info.deepest = 0;
13949
+ alt_synth_info.num_mults = 0;
13951
+ if (representable_as_half_series_p (ceil_fract, max_depth,
13953
+ && alt_synth_info.deepest <= synth_info.deepest
13954
+ && alt_synth_info.num_mults < synth_info.num_mults)
13956
+ whole_part = ceil_whole;
13957
+ frac_part = ceil_fract;
13958
+ synth_info.deepest = alt_synth_info.deepest;
13959
+ synth_info.num_mults = alt_synth_info.num_mults;
13960
+ memcpy (synth_info.factors, alt_synth_info.factors,
13961
+ (max_depth + 1) * sizeof (bool));
13962
+ one_over = false;
13966
+ HOST_WIDE_INT n = real_to_integer (&whole_part);
13967
+ REAL_VALUE_TYPE cint;
13968
+ real_from_integer (&cint, VOIDmode, n, SIGNED);
13970
+ if (!real_identical (&whole_part, &cint))
13971
+ return NULL_TREE;
13973
+ if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
13974
+ return NULL_TREE;
13976
+ memset (cache, 0, (max_depth + 1) * sizeof (tree));
13978
+ tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
13980
+ /* Calculate the integer part of the exponent. */
13983
+ integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
13984
+ if (!integer_res)
13985
+ return NULL_TREE;
13992
+ real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
13993
+ fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
13999
+ fprintf (dump_file, "1.0 / (");
14000
+ dump_integer_part (dump_file, "x", n);
14002
+ fprintf (dump_file, " * ");
14003
+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
14004
+ fprintf (dump_file, ")");
14008
+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
14009
+ fprintf (dump_file, " / (");
14010
+ dump_integer_part (dump_file, "x", n);
14011
+ fprintf (dump_file, ")");
14016
+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
14018
+ fprintf (dump_file, " * ");
14019
+ dump_integer_part (dump_file, "x", n);
14022
+ fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
14026
+ tree fract_res = NULL_TREE;
14029
+ /* Calculate the fractional part of the exponent. */
14030
+ for (unsigned i = 0; i < synth_info.deepest; i++)
14032
+ if (synth_info.factors[i])
14034
+ tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
14037
+ fract_res = sqrt_chain;
14040
+ fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
14041
+ fract_res, sqrt_chain);
14045
+ tree res = NULL_TREE;
14052
+ res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
14053
+ fract_res, integer_res);
14057
+ res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
14058
+ build_real (type, dconst1), res);
14062
+ res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
14063
+ fract_res, integer_res);
14067
+ res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
14068
+ fract_res, integer_res);
14072
/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
14073
with location info LOC. If possible, create an equivalent and
14074
less expensive sequence of statements prior to GSI, and return an
14075
@@ -1157,13 +1509,17 @@ static tree
14076
gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
14077
tree arg0, tree arg1)
14079
- REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
14080
+ REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
14081
REAL_VALUE_TYPE c2, dconst3;
14083
- tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
14084
+ tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
14086
+ bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
14087
bool hw_sqrt_exists, c_is_int, c2_is_int;
14089
+ dconst1_4 = dconst1;
14090
+ SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
14092
/* If the exponent isn't a constant, there's nothing of interest
14094
if (TREE_CODE (arg1) != REAL_CST)
14095
@@ -1179,7 +1535,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
14097
&& ((n >= -1 && n <= 2)
14098
|| (flag_unsafe_math_optimizations
14099
- && optimize_bb_for_speed_p (gsi_bb (*gsi))
14101
&& powi_cost (n) <= POWI_MAX_MULTS)))
14102
return gimple_expand_builtin_powi (gsi, loc, arg0, n);
14104
@@ -1196,49 +1552,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
14105
&& !HONOR_SIGNED_ZEROS (mode))
14106
return build_and_insert_call (gsi, loc, sqrtfn, arg0);
14108
- /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that
14109
- a builtin sqrt instruction is smaller than a call to pow with 0.25,
14110
- so do this optimization even if -Os. Don't do this optimization
14111
- if we don't have a hardware sqrt insn. */
14112
- dconst1_4 = dconst1;
14113
- SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
14114
hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
14116
- if (flag_unsafe_math_optimizations
14118
- && REAL_VALUES_EQUAL (c, dconst1_4)
14119
- && hw_sqrt_exists)
14122
- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
14124
- /* sqrt(sqrt(x)) */
14125
- return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
14128
- /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
14129
- optimizing for space. Don't do this optimization if we don't have
14130
- a hardware sqrt insn. */
14131
- real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED);
14132
- SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
14134
- if (flag_unsafe_math_optimizations
14136
- && optimize_function_for_speed_p (cfun)
14137
- && REAL_VALUES_EQUAL (c, dconst3_4)
14138
- && hw_sqrt_exists)
14141
- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
14143
- /* sqrt(sqrt(x)) */
14144
- sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
14146
- /* sqrt(x) * sqrt(sqrt(x)) */
14147
- return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
14148
- sqrt_arg0, sqrt_sqrt);
14151
/* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
14152
optimizations since 1./3. is not exactly representable. If x
14153
is negative and finite, the correct value of pow(x,1./3.) is
14154
@@ -1263,7 +1578,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
14157
&& (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
14158
- && optimize_function_for_speed_p (cfun)
14161
&& REAL_VALUES_EQUAL (c, dconst1_6))
14163
@@ -1274,54 +1589,31 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
14164
return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
14167
- /* Optimize pow(x,c), where n = 2c for some nonzero integer n
14168
- and c not an integer, into
14170
- sqrt(x) * powi(x, n/2), n > 0;
14171
- 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.
14173
- Do not calculate the powi factor when n/2 = 0. */
14174
- real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
14175
- n = real_to_integer (&c2);
14176
- real_from_integer (&cint, VOIDmode, n, SIGNED);
14177
- c2_is_int = real_identical (&c2, &cint);
14179
+ /* Attempt to expand the POW as a product of square root chains.
14180
+ Expand the 0.25 case even when otpimising for size. */
14181
if (flag_unsafe_math_optimizations
14185
- && optimize_function_for_speed_p (cfun))
14186
+ && hw_sqrt_exists
14187
+ && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
14188
+ && !HONOR_SIGNED_ZEROS (mode))
14190
- tree powi_x_ndiv2 = NULL_TREE;
14192
- /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not
14193
- possible or profitable, give up. Skip the degenerate case when
14194
- n is 1 or -1, where the result is always 1. */
14195
- if (absu_hwi (n) != 1)
14197
- powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
14198
- abs_hwi (n / 2));
14199
- if (!powi_x_ndiv2)
14200
- return NULL_TREE;
14202
+ unsigned int max_depth = speed_p
14203
+ ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
14206
- /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the
14207
- result of the optimal multiply sequence just calculated. */
14208
- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
14209
+ tree expand_with_sqrts
14210
+ = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
14212
- if (absu_hwi (n) == 1)
14213
- result = sqrt_arg0;
14215
- result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
14216
- sqrt_arg0, powi_x_ndiv2);
14218
- /* If n is negative, reciprocate the result. */
14220
- result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
14221
- build_real (type, dconst1), result);
14223
+ if (expand_with_sqrts)
14224
+ return expand_with_sqrts;
14227
+ real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
14228
+ n = real_to_integer (&c2);
14229
+ real_from_integer (&cint, VOIDmode, n, SIGNED);
14230
+ c2_is_int = real_identical (&c2, &cint);
14232
/* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
14234
powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
14235
--- a/src/libgcc/config.host
14236
+++ b/src/libgcc/config.host
14237
@@ -377,14 +377,15 @@ arm*-*-netbsdelf*)
14238
tmake_file="$tmake_file arm/t-arm arm/t-netbsd t-slibgcc-gld-nover"
14240
arm*-*-linux*) # ARM GNU/Linux with ELF
14241
- tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"
14242
+ tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix t-crtfm"
14243
tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc"
14244
tm_file="$tm_file arm/bpabi-lib.h"
14245
unwind_header=config/arm/unwind-arm.h
14246
tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
14247
+ extra_parts="$extra_parts crtfastmath.o"
14249
arm*-*-uclinux*) # ARM ucLinux
14250
- tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
14251
+ tmake_file="${tmake_file} t-fixedpoint-gnu-prefix t-crtfm"
14252
tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
14253
tmake_file="${tmake_file} arm/t-bpabi"
14254
tm_file="$tm_file arm/bpabi-lib.h"
14255
@@ -396,7 +397,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*)
14256
tm_file="$tm_file arm/bpabi-lib.h"
14258
arm*-*-eabi* | arm*-*-rtems*)
14259
- tmake_file="${tmake_file} arm/t-bpabi"
14260
+ tmake_file="${tmake_file} arm/t-bpabi t-crtfm"
14261
extra_parts="crtbegin.o crtend.o crti.o crtn.o"
14263
arm*-*-symbianelf*)
14264
--- a/src//dev/null
14265
+++ b/src/libgcc/config/arm/crtfastmath.c
14268
+ * Copyright (C) 2014 Free Software Foundation, Inc.
14270
+ * This file is free software; you can redistribute it and/or modify it
14271
+ * under the terms of the GNU General Public License as published by the
14272
+ * Free Software Foundation; either version 3, or (at your option) any
14275
+ * This file is distributed in the hope that it will be useful, but
14276
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
14277
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14278
+ * General Public License for more details.
14280
+ * Under Section 7 of GPL version 3, you are granted additional
14281
+ * permissions described in the GCC Runtime Library Exception, version
14282
+ * 3.1, as published by the Free Software Foundation.
14284
+ * You should have received a copy of the GNU General Public License and
14285
+ * a copy of the GCC Runtime Library Exception along with this program;
14286
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
14287
+ * <http://www.gnu.org/licenses/>.
14290
+/* Enable flush-to-zero support for -ffast-math on VFP targets. */
14291
+#ifndef __SOFTFP__
14293
+#define FPSCR_FZ (1 << 24)
14295
+static void __attribute__((constructor))
14296
+__arm_set_fast_math (void)
14298
+ unsigned int fpscr_save;
14300
+ /* Set the FZ (flush-to-zero) bit in FPSCR. */
14301
+ __asm__("vmrs %0, fpscr" : "=r" (fpscr_save));
14302
+ fpscr_save |= FPSCR_FZ;
14303
+ __asm__("vmsr fpscr, %0" : : "r" (fpscr_save));
14306
+#endif /* __SOFTFP__ */
14307
--- a/src/libgcc/config/arm/ieee754-df.S
14308
+++ b/src/libgcc/config/arm/ieee754-df.S
14310
* Only the default rounding mode is intended for best performances.
14311
* Exceptions aren't supported yet, but that can be added quite easily
14312
* if necessary without impacting performances.
14314
+ * In the CFI related comments, 'previousOffset' refers to the previous offset
14315
+ * from sp used to compute the CFA.
14318
+ .cfi_sections .debug_frame
14322
@@ -53,11 +57,13 @@
14324
ARM_FUNC_START negdf2
14325
ARM_FUNC_ALIAS aeabi_dneg negdf2
14326
+ CFI_START_FUNCTION
14329
eor xh, xh, #0x80000000
14333
FUNC_END aeabi_dneg
14336
@@ -66,6 +72,7 @@ ARM_FUNC_ALIAS aeabi_dneg negdf2
14337
#ifdef L_arm_addsubdf3
14339
ARM_FUNC_START aeabi_drsub
14340
+ CFI_START_FUNCTION
14342
eor xh, xh, #0x80000000 @ flip sign bit of first arg
14344
@@ -81,7 +88,11 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3
14345
ARM_FUNC_START adddf3
14346
ARM_FUNC_ALIAS aeabi_dadd adddf3
14348
-1: do_push {r4, r5, lr}
14349
+1: do_push {r4, r5, lr} @ sp -= 12
14350
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14351
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 8
14352
+ .cfi_rel_offset r5, 4
14353
+ .cfi_rel_offset lr, 8
14355
@ Look for zeroes, equal values, INF, or NAN.
14356
shift1 lsl, r4, xh, #1
14357
@@ -148,6 +159,11 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
14358
@ Since this is not common case, rescale them off line.
14362
+@ CFI note: we're lucky that the branches to Lad_* that appear after this function
14363
+@ have a CFI state that's exactly the same as the one we're in at this
14364
+@ point. Otherwise the CFI would change to a different state after the branch,
14365
+@ which would be disastrous for backtracing.
14368
@ Compensate for the exponent overlapping the mantissa MSB added later
14369
@@ -413,6 +429,7 @@ LSYM(Lad_i):
14370
orrne xh, xh, #0x00080000 @ quiet NAN
14374
FUNC_END aeabi_dsub
14376
FUNC_END aeabi_dadd
14377
@@ -420,12 +437,19 @@ LSYM(Lad_i):
14379
ARM_FUNC_START floatunsidf
14380
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
14381
+ CFI_START_FUNCTION
14387
- do_push {r4, r5, lr}
14389
+ do_push {r4, r5, lr} @ sp -= 12
14390
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14391
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.
14392
+ .cfi_rel_offset r5, 4
14393
+ .cfi_rel_offset lr, 8
14395
mov r4, #0x400 @ initial exponent
14396
add r4, r4, #(52-1 - 1)
14397
mov r5, #0 @ sign bit is 0
14398
@@ -435,17 +459,25 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
14403
FUNC_END aeabi_ui2d
14404
FUNC_END floatunsidf
14406
ARM_FUNC_START floatsidf
14407
ARM_FUNC_ALIAS aeabi_i2d floatsidf
14408
+ CFI_START_FUNCTION
14414
- do_push {r4, r5, lr}
14416
+ do_push {r4, r5, lr} @ sp -= 12
14417
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14418
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.
14419
+ .cfi_rel_offset r5, 4
14420
+ .cfi_rel_offset lr, 8
14422
mov r4, #0x400 @ initial exponent
14423
add r4, r4, #(52-1 - 1)
14424
ands r5, r0, #0x80000000 @ sign bit in r5
14425
@@ -457,11 +489,13 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf
14433
ARM_FUNC_START extendsfdf2
14434
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
14435
+ CFI_START_FUNCTION
14437
movs r2, r0, lsl #1 @ toss sign bit
14438
mov xh, r2, asr #3 @ stretch exponent
14439
@@ -480,34 +514,54 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
14441
@ value was denormalized. We can normalize it now.
14442
do_push {r4, r5, lr}
14443
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14444
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.
14445
+ .cfi_rel_offset r5, 4
14446
+ .cfi_rel_offset lr, 8
14448
mov r4, #0x380 @ setup corresponding exponent
14449
and r5, xh, #0x80000000 @ move sign bit in r5
14450
bic xh, xh, #0x80000000
14455
FUNC_END extendsfdf2
14457
ARM_FUNC_START floatundidf
14458
ARM_FUNC_ALIAS aeabi_ul2d floatundidf
14459
+ CFI_START_FUNCTION
14460
+ .cfi_remember_state @ Save the current CFA state.
14466
- do_push {r4, r5, lr}
14467
+ do_push {r4, r5, lr} @ sp -= 12
14468
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14469
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8
14470
+ .cfi_rel_offset r5, 4
14471
+ .cfi_rel_offset lr, 8
14476
ARM_FUNC_START floatdidf
14477
ARM_FUNC_ALIAS aeabi_l2d floatdidf
14478
+ .cfi_restore_state
14479
+ @ Restore the CFI state we saved above. If we didn't do this then the
14480
+ @ following instructions would have the CFI state that was set by the
14481
+ @ offset adjustments made in floatundidf.
14487
- do_push {r4, r5, lr}
14488
+ do_push {r4, r5, lr} @ sp -= 12
14489
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14490
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 8
14491
+ .cfi_rel_offset r5, 4
14492
+ .cfi_rel_offset lr, 8
14494
ands r5, ah, #0x80000000 @ sign bit in r5
14496
@@ -550,6 +604,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
14503
FUNC_END floatundidf
14504
@@ -561,7 +616,14 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
14506
ARM_FUNC_START muldf3
14507
ARM_FUNC_ALIAS aeabi_dmul muldf3
14508
- do_push {r4, r5, r6, lr}
14509
+ CFI_START_FUNCTION
14511
+ do_push {r4, r5, r6, lr} @ sp -= 16
14512
+ .cfi_adjust_cfa_offset 16 @ CFA is now sp + previousOffset + 16
14513
+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 12.
14514
+ .cfi_rel_offset r5, 4
14515
+ .cfi_rel_offset r6, 8
14516
+ .cfi_rel_offset lr, 12
14518
@ Mask out exponents, trap any zero/denormal/INF/NAN.
14520
@@ -596,7 +658,16 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
14521
and r6, r6, #0x80000000
14523
@ Well, no way to make it shorter without the umull instruction.
14524
- stmfd sp!, {r6, r7, r8, r9, sl, fp}
14525
+ stmfd sp!, {r6, r7, r8, r9, sl, fp} @ sp -= 24
14526
+ .cfi_remember_state @ Save the current CFI state.
14527
+ .cfi_adjust_cfa_offset 24 @ CFA is now sp + previousOffset + 24.
14528
+ .cfi_rel_offset r6, 0 @ Registers are saved from sp to sp + 20.
14529
+ .cfi_rel_offset r7, 4
14530
+ .cfi_rel_offset r8, 8
14531
+ .cfi_rel_offset r9, 12
14532
+ .cfi_rel_offset sl, 16
14533
+ .cfi_rel_offset fp, 20
14535
mov r7, xl, lsr #16
14536
mov r8, yl, lsr #16
14537
mov r9, xh, lsr #16
14538
@@ -648,8 +719,8 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
14542
- ldmfd sp!, {yl, r7, r8, r9, sl, fp}
14544
+ ldmfd sp!, {yl, r7, r8, r9, sl, fp} @ sp += 24
14545
+ .cfi_restore_state @ Restore the previous CFI state.
14548
@ Here is the actual multiplication.
14549
@@ -715,7 +786,6 @@ LSYM(Lml_1):
14550
orr xh, xh, #0x00100000
14557
@@ -863,13 +933,20 @@ LSYM(Lml_n):
14558
orr xh, xh, #0x00f80000
14559
RETLDM "r4, r5, r6"
14562
FUNC_END aeabi_dmul
14565
ARM_FUNC_START divdf3
14566
ARM_FUNC_ALIAS aeabi_ddiv divdf3
14567
+ CFI_START_FUNCTION
14569
do_push {r4, r5, r6, lr}
14570
+ .cfi_adjust_cfa_offset 16
14571
+ .cfi_rel_offset r4, 0
14572
+ .cfi_rel_offset r5, 4
14573
+ .cfi_rel_offset r6, 8
14574
+ .cfi_rel_offset lr, 12
14576
@ Mask out exponents, trap any zero/denormal/INF/NAN.
14578
@@ -1052,6 +1129,7 @@ LSYM(Ldv_s):
14579
bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
14580
b LSYM(Lml_n) @ 0 / 0 -> NAN
14583
FUNC_END aeabi_ddiv
14586
@@ -1063,6 +1141,7 @@ LSYM(Ldv_s):
14588
ARM_FUNC_START gtdf2
14589
ARM_FUNC_ALIAS gedf2 gtdf2
14590
+ CFI_START_FUNCTION
14594
@@ -1077,6 +1156,10 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
14595
mov ip, #1 @ how should we specify unordered here?
14597
1: str ip, [sp, #-4]!
14598
+ .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.
14599
+ @ We're not adding CFI for ip as it's pushed into the stack
14600
+ @ only because @ it may be popped off later as a return value
14601
+ @ (i.e. we're not preserving @ it anyways).
14603
@ Trap any INF/NAN first.
14605
@@ -1085,10 +1168,18 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
14607
COND(mvn,s,ne) ip, ip, asr #21
14610
- @ Test for equality.
14611
- @ Note that 0.0 is equal to -0.0.
14612
+ .cfi_remember_state
14613
+ @ Save the current CFI state. This is done because the branch
14614
+ @ is conditional, @ and if we don't take it we'll issue a
14615
+ @ .cfi_adjust_cfa_offset and return. @ If we do take it,
14616
+ @ however, the .cfi_adjust_cfa_offset from the non-branch @ code
14617
+ @ will affect the branch code as well. To avoid this we'll
14618
+ @ restore @ the current state before executing the branch code.
14620
+ @ Test for equality. @ Note that 0.0 is equal to -0.0.
14622
+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
14624
orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
14626
COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
14627
@@ -1117,8 +1208,13 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
14631
- @ Look for a NAN.
14632
-3: mov ip, xh, lsl #1
14633
+3: @ Look for a NAN.
14635
+ @ Restore the previous CFI state (i.e. keep the CFI state as it was
14636
+ @ before the branch).
14637
+ .cfi_restore_state
14639
+ mov ip, xh, lsl #1
14640
mvns ip, ip, asr #21
14642
orrs ip, xl, xh, lsl #12
14643
@@ -1128,9 +1224,13 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
14645
orrs ip, yl, yh, lsl #12
14646
beq 2b @ y is not NAN
14648
5: ldr r0, [sp], #4 @ unordered return code
14649
+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
14657
@@ -1140,6 +1240,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
14660
ARM_FUNC_START aeabi_cdrcmple
14661
+ CFI_START_FUNCTION
14665
@@ -1148,13 +1249,17 @@ ARM_FUNC_START aeabi_cdrcmple
14671
ARM_FUNC_START aeabi_cdcmpeq
14672
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
14674
@ The status-returning routines are required to preserve all
14675
@ registers except ip, lr, and cpsr.
14676
6: do_push {r0, lr}
14677
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8.
14678
+ .cfi_rel_offset r0, 0 @ Previous r0 is saved at sp.
14679
+ .cfi_rel_offset lr, 4 @ Previous lr is saved at sp + 4.
14682
@ Set the Z flag correctly, and the C flag unconditionally.
14684
@@ -1162,59 +1267,86 @@ ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
14685
@ that the first operand was smaller than the second.
14692
FUNC_END aeabi_cdcmple
14693
FUNC_END aeabi_cdcmpeq
14694
FUNC_END aeabi_cdrcmple
14696
ARM_FUNC_START aeabi_dcmpeq
14697
+ CFI_START_FUNCTION
14699
+ str lr, [sp, #-8]! @ sp -= 8
14700
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
14701
+ .cfi_rel_offset lr, 0 @ lr is at sp
14703
- str lr, [sp, #-8]!
14704
ARM_CALL aeabi_cdcmple
14706
moveq r0, #1 @ Equal to.
14707
movne r0, #0 @ Less than, greater than, or unordered.
14712
FUNC_END aeabi_dcmpeq
14714
ARM_FUNC_START aeabi_dcmplt
14715
+ CFI_START_FUNCTION
14717
+ str lr, [sp, #-8]! @ sp -= 8
14718
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
14719
+ .cfi_rel_offset lr, 0 @ lr is at sp
14721
- str lr, [sp, #-8]!
14722
ARM_CALL aeabi_cdcmple
14724
movcc r0, #1 @ Less than.
14725
movcs r0, #0 @ Equal to, greater than, or unordered.
14729
FUNC_END aeabi_dcmplt
14731
ARM_FUNC_START aeabi_dcmple
14732
+ CFI_START_FUNCTION
14734
+ str lr, [sp, #-8]! @ sp -= 8
14735
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
14736
+ .cfi_rel_offset lr, 0 @ lr is at sp
14738
- str lr, [sp, #-8]!
14739
ARM_CALL aeabi_cdcmple
14741
movls r0, #1 @ Less than or equal to.
14742
movhi r0, #0 @ Greater than or unordered.
14746
FUNC_END aeabi_dcmple
14748
ARM_FUNC_START aeabi_dcmpge
14749
+ CFI_START_FUNCTION
14751
+ str lr, [sp, #-8]! @ sp -= 8
14752
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
14753
+ .cfi_rel_offset lr, 0 @ lr is at sp
14755
- str lr, [sp, #-8]!
14756
ARM_CALL aeabi_cdrcmple
14758
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
14759
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
14763
FUNC_END aeabi_dcmpge
14765
ARM_FUNC_START aeabi_dcmpgt
14766
+ CFI_START_FUNCTION
14768
+ str lr, [sp, #-8]! @ sp -= 8
14769
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
14770
+ .cfi_rel_offset lr, 0 @ lr is at sp
14772
- str lr, [sp, #-8]!
14773
ARM_CALL aeabi_cdrcmple
14775
movcc r0, #1 @ Operand 2 is less than operand 1.
14776
@@ -1222,6 +1354,7 @@ ARM_FUNC_START aeabi_dcmpgt
14777
@ or they are unordered.
14781
FUNC_END aeabi_dcmpgt
14783
#endif /* L_cmpdf2 */
14784
@@ -1230,6 +1363,7 @@ ARM_FUNC_START aeabi_dcmpgt
14786
ARM_FUNC_START unorddf2
14787
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
14791
mvns ip, ip, asr #21
14792
@@ -1247,6 +1381,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
14793
3: mov r0, #1 @ arguments are unordered.
14797
FUNC_END aeabi_dcmpun
14800
@@ -1256,6 +1391,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
14802
ARM_FUNC_START fixdfsi
14803
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
14804
+ CFI_START_FUNCTION
14806
@ check exponent range.
14808
@@ -1289,6 +1425,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
14809
4: mov r0, #0 @ How should we convert NAN?
14813
FUNC_END aeabi_d2iz
14816
@@ -1298,6 +1435,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
14818
ARM_FUNC_START fixunsdfsi
14819
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
14820
+ CFI_START_FUNCTION
14822
@ check exponent range.
14823
movs r2, xh, lsl #1
14824
@@ -1327,6 +1465,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
14825
4: mov r0, #0 @ How should we convert NAN?
14829
FUNC_END aeabi_d2uiz
14830
FUNC_END fixunsdfsi
14832
@@ -1336,6 +1475,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
14834
ARM_FUNC_START truncdfsf2
14835
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
14836
+ CFI_START_FUNCTION
14838
@ check exponent range.
14840
@@ -1400,6 +1540,7 @@ ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
14841
orr r0, r0, #0x00800000
14846
FUNC_END truncdfsf2
14848
--- a/src/libgcc/config/arm/ieee754-sf.S
14849
+++ b/src/libgcc/config/arm/ieee754-sf.S
14850
@@ -31,16 +31,21 @@
14851
* Only the default rounding mode is intended for best performances.
14852
* Exceptions aren't supported yet, but that can be added quite easily
14853
* if necessary without impacting performances.
14855
+ * In the CFI related comments, 'previousOffset' refers to the previous offset
14856
+ * from sp used to compute the CFA.
14859
#ifdef L_arm_negsf2
14861
ARM_FUNC_START negsf2
14862
ARM_FUNC_ALIAS aeabi_fneg negsf2
14863
+ CFI_START_FUNCTION
14865
eor r0, r0, #0x80000000 @ flip sign bit
14869
FUNC_END aeabi_fneg
14872
@@ -49,6 +54,7 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2
14873
#ifdef L_arm_addsubsf3
14875
ARM_FUNC_START aeabi_frsub
14876
+ CFI_START_FUNCTION
14878
eor r0, r0, #0x80000000 @ flip sign bit of first arg
14880
@@ -284,6 +290,7 @@ LSYM(Lad_i):
14881
orrne r0, r0, #0x00400000 @ quiet NAN
14885
FUNC_END aeabi_frsub
14886
FUNC_END aeabi_fadd
14888
@@ -292,6 +299,7 @@ LSYM(Lad_i):
14890
ARM_FUNC_START floatunsisf
14891
ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
14892
+ CFI_START_FUNCTION
14896
@@ -316,6 +324,7 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
14903
FUNC_END aeabi_ui2f
14904
@@ -323,6 +332,7 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
14906
ARM_FUNC_START floatundisf
14907
ARM_FUNC_ALIAS aeabi_ul2f floatundisf
14908
+ CFI_START_FUNCTION
14912
@@ -409,6 +419,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
14913
biceq r0, r0, ip, lsr #31
14919
FUNC_END floatundisf
14920
@@ -420,6 +431,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
14922
ARM_FUNC_START mulsf3
14923
ARM_FUNC_ALIAS aeabi_fmul mulsf3
14924
+ CFI_START_FUNCTION
14926
@ Mask out exponents, trap any zero/denormal/INF/NAN.
14928
@@ -454,7 +466,13 @@ LSYM(Lml_x):
14929
and r3, ip, #0x80000000
14931
@ Well, no way to make it shorter without the umull instruction.
14932
- do_push {r3, r4, r5}
14933
+ do_push {r3, r4, r5} @ sp -= 12
14934
+ .cfi_remember_state @ Save the current CFI state
14935
+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
14936
+ .cfi_rel_offset r3, 0 @ Registers are saved from sp to sp + 8
14937
+ .cfi_rel_offset r4, 4
14938
+ .cfi_rel_offset r5, 8
14940
mov r4, r0, lsr #16
14941
mov r5, r1, lsr #16
14942
bic r0, r0, r4, lsl #16
14943
@@ -465,7 +483,8 @@ LSYM(Lml_x):
14945
adds r3, r3, r0, lsl #16
14946
adc r1, ip, r0, lsr #16
14947
- do_pop {r0, r4, r5}
14948
+ do_pop {r0, r4, r5} @ sp += 12
14949
+ .cfi_restore_state @ Restore the previous CFI state
14953
@@ -618,11 +637,13 @@ LSYM(Lml_n):
14954
orr r0, r0, #0x00c00000
14958
FUNC_END aeabi_fmul
14961
ARM_FUNC_START divsf3
14962
ARM_FUNC_ALIAS aeabi_fdiv divsf3
14963
+ CFI_START_FUNCTION
14965
@ Mask out exponents, trap any zero/denormal/INF/NAN.
14967
@@ -758,6 +779,7 @@ LSYM(Ldv_s):
14968
bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
14969
b LSYM(Lml_n) @ 0 / 0 -> NAN
14972
FUNC_END aeabi_fdiv
14975
@@ -782,6 +804,7 @@ LSYM(Ldv_s):
14977
ARM_FUNC_START gtsf2
14978
ARM_FUNC_ALIAS gesf2 gtsf2
14979
+ CFI_START_FUNCTION
14983
@@ -796,6 +819,10 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
14984
mov ip, #1 @ how should we specify unordered here?
14986
1: str ip, [sp, #-4]!
14987
+ .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.
14988
+ @ We're not adding CFI for ip as it's pushed into the stack only because
14989
+ @ it may be popped off later as a return value (i.e. we're not preserving
14992
@ Trap any INF/NAN first.
14994
@@ -804,10 +831,18 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
14996
COND(mvn,s,ne) ip, r3, asr #24
14998
+ .cfi_remember_state
14999
+ @ Save the current CFI state. This is done because the branch is conditional,
15000
+ @ and if we don't take it we'll issue a .cfi_adjust_cfa_offset and return.
15001
+ @ If we do take it, however, the .cfi_adjust_cfa_offset from the non-branch
15002
+ @ code will affect the branch code as well. To avoid this we'll restore
15003
+ @ the current state before executing the branch code.
15006
@ Note that 0.0 is equal to -0.0.
15008
+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
15010
orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
15012
teqne r0, r1 @ if not 0 compare sign
15013
@@ -823,8 +858,13 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
15017
- @ Look for a NAN.
15018
-3: mvns ip, r2, asr #24
15019
+3: @ Look for a NAN.
15021
+ @ Restore the previous CFI state (i.e. keep the CFI state as it was
15022
+ @ before the branch).
15023
+ .cfi_restore_state
15025
+ mvns ip, r2, asr #24
15027
movs ip, r0, lsl #9
15029
@@ -832,9 +872,12 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
15031
movs ip, r1, lsl #9
15032
beq 2b @ r1 is not NAN
15034
5: ldr r0, [sp], #4 @ return unordered code.
15035
+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
15042
@@ -844,6 +887,7 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
15045
ARM_FUNC_START aeabi_cfrcmple
15046
+ CFI_START_FUNCTION
15050
@@ -856,6 +900,13 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
15051
@ The status-returning routines are required to preserve all
15052
@ registers except ip, lr, and cpsr.
15053
6: do_push {r0, r1, r2, r3, lr}
15054
+ .cfi_adjust_cfa_offset 20 @ CFA is at sp + previousOffset + 20
15055
+ .cfi_rel_offset r0, 0 @ Registers are saved from sp to sp + 16
15056
+ .cfi_rel_offset r1, 4
15057
+ .cfi_rel_offset r2, 8
15058
+ .cfi_rel_offset r3, 12
15059
+ .cfi_rel_offset lr, 16
15062
@ Set the Z flag correctly, and the C flag unconditionally.
15064
@@ -865,57 +916,82 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
15066
RETLDM "r0, r1, r2, r3"
15069
FUNC_END aeabi_cfcmple
15070
FUNC_END aeabi_cfcmpeq
15071
FUNC_END aeabi_cfrcmple
15073
ARM_FUNC_START aeabi_fcmpeq
15074
+ CFI_START_FUNCTION
15076
+ str lr, [sp, #-8]! @ sp -= 8
15077
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
15078
+ .cfi_rel_offset lr, 0 @ lr is at sp
15080
- str lr, [sp, #-8]!
15081
ARM_CALL aeabi_cfcmple
15083
moveq r0, #1 @ Equal to.
15084
movne r0, #0 @ Less than, greater than, or unordered.
15088
FUNC_END aeabi_fcmpeq
15090
ARM_FUNC_START aeabi_fcmplt
15091
+ CFI_START_FUNCTION
15093
+ str lr, [sp, #-8]! @ sp -= 8
15094
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
15095
+ .cfi_rel_offset lr, 0 @ lr is at sp
15097
- str lr, [sp, #-8]!
15098
ARM_CALL aeabi_cfcmple
15100
movcc r0, #1 @ Less than.
15101
movcs r0, #0 @ Equal to, greater than, or unordered.
15105
FUNC_END aeabi_fcmplt
15107
ARM_FUNC_START aeabi_fcmple
15108
+ CFI_START_FUNCTION
15110
+ str lr, [sp, #-8]! @ sp -= 8
15111
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
15112
+ .cfi_rel_offset lr, 0 @ lr is at sp
15114
- str lr, [sp, #-8]!
15115
ARM_CALL aeabi_cfcmple
15117
movls r0, #1 @ Less than or equal to.
15118
movhi r0, #0 @ Greater than or unordered.
15122
FUNC_END aeabi_fcmple
15124
ARM_FUNC_START aeabi_fcmpge
15125
+ CFI_START_FUNCTION
15127
+ str lr, [sp, #-8]! @ sp -= 8
15128
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
15129
+ .cfi_rel_offset lr, 0 @ lr is at sp
15131
- str lr, [sp, #-8]!
15132
ARM_CALL aeabi_cfrcmple
15134
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
15135
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
15139
FUNC_END aeabi_fcmpge
15141
ARM_FUNC_START aeabi_fcmpgt
15142
+ CFI_START_FUNCTION
15144
+ str lr, [sp, #-8]! @ sp -= 8
15145
+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
15146
+ .cfi_rel_offset lr, 0 @ lr is at sp
15148
- str lr, [sp, #-8]!
15149
ARM_CALL aeabi_cfrcmple
15151
movcc r0, #1 @ Operand 2 is less than operand 1.
15152
@@ -923,6 +999,7 @@ ARM_FUNC_START aeabi_fcmpgt
15153
@ or they are unordered.
15157
FUNC_END aeabi_fcmpgt
15159
#endif /* L_cmpsf2 */
15160
@@ -931,6 +1008,7 @@ ARM_FUNC_START aeabi_fcmpgt
15162
ARM_FUNC_START unordsf2
15163
ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
15164
+ CFI_START_FUNCTION
15168
@@ -947,6 +1025,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
15169
3: mov r0, #1 @ arguments are unordered.
15173
FUNC_END aeabi_fcmpun
15176
@@ -956,6 +1035,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
15178
ARM_FUNC_START fixsfsi
15179
ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
15180
+ CFI_START_FUNCTION
15182
@ check exponent range.
15184
@@ -989,6 +1069,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
15185
4: mov r0, #0 @ What should we convert NAN to?
15189
FUNC_END aeabi_f2iz
15192
@@ -998,6 +1079,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
15194
ARM_FUNC_START fixunssfsi
15195
ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
15196
+ CFI_START_FUNCTION
15198
@ check exponent range.
15199
movs r2, r0, lsl #1
15200
@@ -1027,6 +1109,7 @@ ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
15201
4: mov r0, #0 @ What should we convert NAN to?
15205
FUNC_END aeabi_f2uiz
15206
FUNC_END fixunssfsi
15208
--- a/src/libgcc/config/arm/lib1funcs.S
15209
+++ b/src/libgcc/config/arm/lib1funcs.S
15210
@@ -1965,6 +1965,16 @@ LSYM(Lchange_\register):
15212
#endif /* Arch supports thumb. */
15214
+.macro CFI_START_FUNCTION
15216
+ .cfi_remember_state
15219
+.macro CFI_END_FUNCTION
15220
+ .cfi_restore_state
15224
#ifndef __symbian__
15225
#ifndef __ARM_ARCH_6M__
15226
#include "ieee754-df.S"
15227
--- a/src/libgcc/unwind-dw2-fde-dip.c
15228
+++ b/src/libgcc/unwind-dw2-fde-dip.c
15231
#if !defined(inhibit_libc) && defined(HAVE_LD_EH_FRAME_HDR) \
15232
&& defined(TARGET_DL_ITERATE_PHDR) \
15233
+ && defined(__linux__)
15234
+# define USE_PT_GNU_EH_FRAME
15237
+#if !defined(inhibit_libc) && defined(HAVE_LD_EH_FRAME_HDR) \
15238
+ && defined(TARGET_DL_ITERATE_PHDR) \
15239
&& (defined(__DragonFly__) || defined(__FreeBSD__))
15240
# define ElfW __ElfN
15241
# define USE_PT_GNU_EH_FRAME
15242
--- a/src/libgfortran/acinclude.m4
15243
+++ b/src/libgfortran/acinclude.m4
15244
@@ -100,7 +100,7 @@ void foo (void);
15245
[Define to 1 if the target supports #pragma weak])
15248
- *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* )
15249
+ *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* | *-*-musl* )
15250
AC_DEFINE(GTHREAD_USE_WEAK, 0,
15251
[Define to 0 if the target shouldn't use #pragma weak])
15253
--- a/src/libgfortran/configure
15254
+++ b/src/libgfortran/configure
15255
@@ -26456,7 +26456,7 @@ $as_echo "#define SUPPORTS_WEAK 1" >>confdefs.h
15259
- *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* )
15260
+ *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* | *-*-musl* )
15262
$as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h
15264
--- a/src/libitm/config/arm/hwcap.cc
15265
+++ b/src/libitm/config/arm/hwcap.cc
15266
@@ -40,7 +40,7 @@ int GTM_hwcap HIDDEN = 0
15269
#include <unistd.h>
15270
-#include <sys/fcntl.h>
15271
+#include <fcntl.h>
15274
static void __attribute__((constructor))
15275
--- a/src/libitm/config/linux/x86/tls.h
15276
+++ b/src/libitm/config/linux/x86/tls.h
15277
@@ -25,16 +25,19 @@
15278
#ifndef LIBITM_X86_TLS_H
15279
#define LIBITM_X86_TLS_H 1
15281
-#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10)
15282
+#if defined(__GLIBC_PREREQ)
15283
+#if __GLIBC_PREREQ(2, 10)
15284
/* Use slots in the TCB head rather than __thread lookups.
15285
GLIBC has reserved words 10 through 13 for TM. */
15286
#define HAVE_ARCH_GTM_THREAD 1
15287
#define HAVE_ARCH_GTM_THREAD_DISP 1
15291
#include "config/generic/tls.h"
15293
-#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10)
15294
+#if defined(__GLIBC_PREREQ)
15295
+#if __GLIBC_PREREQ(2, 10)
15296
namespace GTM HIDDEN {
15299
@@ -101,5 +104,6 @@ static inline void set_abi_disp(struct abi_dispatch *x)
15302
#endif /* >= GLIBC 2.10 */
15305
#endif // LIBITM_X86_TLS_H
15306
--- a/src//dev/null
15307
+++ b/src/libstdc++-v3/config/cpu/arm/cpu_defines.h
15309
+// Specific definitions for generic platforms -*- C++ -*-
15311
+// Copyright (C) 2015 Free Software Foundation, Inc.
15313
+// This file is part of the GNU ISO C++ Library. This library is free
15314
+// software; you can redistribute it and/or modify it under the
15315
+// terms of the GNU General Public License as published by the
15316
+// Free Software Foundation; either version 3, or (at your option)
15317
+// any later version.
15319
+// This library is distributed in the hope that it will be useful,
15320
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
15321
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15322
+// GNU General Public License for more details.
15324
+// Under Section 7 of GPL version 3, you are granted additional
15325
+// permissions described in the GCC Runtime Library Exception, version
15326
+// 3.1, as published by the Free Software Foundation.
15328
+// You should have received a copy of the GNU General Public License and
15329
+// a copy of the GCC Runtime Library Exception along with this program;
15330
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
15331
+// <http://www.gnu.org/licenses/>.
15333
+/** @file bits/cpu_defines.h
15334
+ * This is an internal header file, included by other library headers.
15335
+ * Do not attempt to use it directly. @headername{iosfwd}
15338
+#ifndef _GLIBCXX_CPU_DEFINES
15339
+#define _GLIBCXX_CPU_DEFINES 1
15341
+// Integer divide instructions don't trap on ARM.
15342
+#ifdef __ARM_ARCH_EXT_IDIV__
15343
+#define __glibcxx_integral_traps false
15345
+#define __glibcxx_integral_traps true
15349
--- a/src/libstdc++-v3/config/os/generic/os_defines.h
15350
+++ b/src/libstdc++-v3/config/os/generic/os_defines.h
15352
// System-specific #define, typedefs, corrections, etc, go here. This
15353
// file will come before all others.
15355
+// Disable the weak reference logic in gthr.h for os/generic because it
15356
+// is broken on every platform unless there is implementation specific
15357
+// workaround in gthr-posix.h and at link-time for static linking.
15358
+#define _GLIBCXX_GTHREAD_USE_WEAK 0
15361
--- a/src/libstdc++-v3/configure.host
15362
+++ b/src/libstdc++-v3/configure.host
15363
@@ -143,6 +143,9 @@ cpu_include_dir=cpu/${try_cpu}
15364
# Set specific CPU overrides for cpu_defines_dir. Most can just use generic.
15365
# THIS TABLE IS SORTED. KEEP IT THAT WAY.
15366
case "${host_cpu}" in
15368
+ cpu_defines_dir=cpu/arm
15371
cpu_defines_dir=cpu/powerpc
15373
@@ -273,6 +276,9 @@ case "${host_os}" in
15375
os_include_dir="os/bsd/freebsd"
15378
+ os_include_dir="os/generic"
15380
gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
15381
if [ "$uclibc" = "yes" ]; then
15382
os_include_dir="os/uclibc"